aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlana <none@none>2012-10-30 13:56:59 -0700
committerlana <none@none>2012-10-30 13:56:59 -0700
commit2e4edaa1ceaedac7a1f6bd7cf6b35a7ba5a897e3 (patch)
treefca86438a2964de41bf1953e8499d7f720aef74f
parentabcaa5a42002e568f6a15b689a10a9cccfb3f2fc (diff)
parentee73d39351fca9aec87825da823312cf027b8a86 (diff)
-rw-r--r--.hgtags1
-rw-r--r--agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java19
-rw-r--r--agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java16
-rw-r--r--agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java45
-rw-r--r--make/excludeSrc.make6
-rw-r--r--make/hotspot_version2
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp97
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp25
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_32.cpp533
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_64.cpp552
-rw-r--r--src/cpu/x86/vm/stubRoutines_x86_32.cpp1
-rw-r--r--src/cpu/x86/vm/stubRoutines_x86_32.hpp4
-rw-r--r--src/cpu/x86/vm/stubRoutines_x86_64.cpp1
-rw-r--r--src/cpu/x86/vm/stubRoutines_x86_64.hpp5
-rw-r--r--src/cpu/x86/vm/vm_version_x86.cpp32
-rw-r--r--src/cpu/x86/vm/vm_version_x86.hpp10
-rw-r--r--src/cpu/x86/vm/x86.ad153
-rw-r--r--src/os/bsd/vm/perfMemory_bsd.cpp9
-rw-r--r--src/os/linux/vm/perfMemory_linux.cpp9
-rw-r--r--src/os/solaris/vm/os_solaris.cpp4
-rw-r--r--src/os/solaris/vm/perfMemory_solaris.cpp9
-rw-r--r--src/os/windows/vm/perfMemory_windows.cpp12
-rw-r--r--src/share/vm/c1/c1_GraphBuilder.cpp41
-rw-r--r--src/share/vm/classfile/vmSymbols.hpp16
-rw-r--r--src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp175
-rw-r--r--src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp232
-rw-r--r--src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp81
-rw-r--r--src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp12
-rw-r--r--src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp4
-rw-r--r--src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp2
-rw-r--r--src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp20
-rw-r--r--src/share/vm/gc_implementation/shared/vmGCOperations.hpp2
-rw-r--r--src/share/vm/memory/allocation.cpp16
-rw-r--r--src/share/vm/memory/allocation.hpp7
-rw-r--r--src/share/vm/memory/binaryTreeDictionary.cpp770
-rw-r--r--src/share/vm/memory/binaryTreeDictionary.hpp211
-rw-r--r--src/share/vm/memory/filemap.cpp20
-rw-r--r--src/share/vm/memory/filemap.hpp1
-rw-r--r--src/share/vm/memory/freeBlockDictionary.cpp5
-rw-r--r--src/share/vm/memory/freeBlockDictionary.hpp2
-rw-r--r--src/share/vm/memory/freeList.cpp100
-rw-r--r--src/share/vm/memory/freeList.hpp211
-rw-r--r--src/share/vm/memory/metablock.hpp103
-rw-r--r--src/share/vm/memory/metachunk.hpp133
-rw-r--r--src/share/vm/memory/metaspace.cpp664
-rw-r--r--src/share/vm/memory/metaspace.hpp6
-rw-r--r--src/share/vm/memory/metaspaceShared.cpp12
-rw-r--r--src/share/vm/memory/resourceArea.hpp20
-rw-r--r--src/share/vm/oops/method.cpp6
-rw-r--r--src/share/vm/opto/c2_globals.hpp3
-rw-r--r--src/share/vm/opto/callGenerator.cpp123
-rw-r--r--src/share/vm/opto/callGenerator.hpp3
-rw-r--r--src/share/vm/opto/compile.cpp2
-rw-r--r--src/share/vm/opto/compile.hpp2
-rw-r--r--src/share/vm/opto/doCall.cpp12
-rw-r--r--src/share/vm/opto/escape.cpp16
-rw-r--r--src/share/vm/opto/library_call.cpp371
-rw-r--r--src/share/vm/opto/mulnode.cpp41
-rw-r--r--src/share/vm/opto/runtime.cpp42
-rw-r--r--src/share/vm/opto/runtime.hpp3
-rw-r--r--src/share/vm/opto/superword.cpp40
-rw-r--r--src/share/vm/opto/type.cpp2
-rw-r--r--src/share/vm/opto/vectornode.cpp25
-rw-r--r--src/share/vm/prims/unsafe.cpp2
-rw-r--r--src/share/vm/runtime/arguments.cpp13
-rw-r--r--src/share/vm/runtime/globals.hpp6
-rw-r--r--src/share/vm/runtime/handles.cpp7
-rw-r--r--src/share/vm/runtime/handles.hpp1
-rw-r--r--src/share/vm/runtime/handles.inline.hpp7
-rw-r--r--src/share/vm/runtime/os.cpp16
-rw-r--r--src/share/vm/runtime/stubRoutines.cpp4
-rw-r--r--src/share/vm/runtime/stubRoutines.hpp10
-rw-r--r--src/share/vm/runtime/thread.cpp25
-rw-r--r--src/share/vm/runtime/vmStructs.cpp31
-rw-r--r--src/share/vm/services/attachListener.cpp2
-rw-r--r--src/share/vm/services/memBaseline.cpp231
-rw-r--r--src/share/vm/services/memBaseline.hpp5
-rw-r--r--src/share/vm/services/memPtr.cpp32
-rw-r--r--src/share/vm/services/memPtr.hpp107
-rw-r--r--src/share/vm/services/memRecorder.cpp17
-rw-r--r--src/share/vm/services/memRecorder.hpp1
-rw-r--r--src/share/vm/services/memReporter.cpp60
-rw-r--r--src/share/vm/services/memReporter.hpp13
-rw-r--r--src/share/vm/services/memSnapshot.cpp365
-rw-r--r--src/share/vm/services/memSnapshot.hpp134
-rw-r--r--src/share/vm/services/memTracker.cpp7
-rw-r--r--src/share/vm/services/memTracker.hpp15
-rw-r--r--test/compiler/6340864/TestByteVect.java215
-rw-r--r--test/compiler/6340864/TestIntVect.java213
-rw-r--r--test/compiler/6340864/TestLongVect.java212
-rw-r--r--test/compiler/6340864/TestShortVect.java213
-rw-r--r--test/compiler/7184394/TestAESBase.java154
-rw-r--r--test/compiler/7184394/TestAESDecode.java57
-rw-r--r--test/compiler/7184394/TestAESEncode.java57
-rw-r--r--test/compiler/7184394/TestAESMain.java57
-rw-r--r--test/compiler/8000805/Test8000805.java85
-rw-r--r--test/compiler/8001183/TestCharVect.java1332
97 files changed, 7119 insertions, 1689 deletions
diff --git a/.hgtags b/.hgtags
index 4f9539a26..e4f8bc77a 100644
--- a/.hgtags
+++ b/.hgtags
@@ -288,3 +288,4 @@ b261523fe66c40a02968f0aa7e73602491bb3386 hs25-b05
4547dc71db765276e027b0c2780b724bae0a07d3 jdk8-b61
d0337c31c8be7716369b4e7c3bd5f352983c6a06 hs25-b06
dccd40de8db1fa96f186e6179907818d75320440 jdk8-b62
+dc16fe422c535ecd4e9f80fb814a1bb9704da6f5 hs25-b07
diff --git a/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java b/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java
index 1ba20f79f..3f09ee30c 100644
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java
@@ -272,9 +272,10 @@ public class Bytecodes {
public static final int _fast_aldc = 229;
public static final int _fast_aldc_w = 230;
public static final int _return_register_finalizer = 231;
- public static final int _shouldnotreachhere = 232; // For debugging
+ public static final int _invokehandle = 232;
+ public static final int _shouldnotreachhere = 233; // For debugging
- public static final int number_of_codes = 233;
+ public static final int number_of_codes = 234;
// Flag bits derived from format strings, can_trap, can_rewrite, etc.:
// semantic flags:
@@ -787,20 +788,22 @@ public class Bytecodes {
def(_fast_aaccess_0 , "fast_aaccess_0" , "b_JJ" , null , BasicType.getTObject() , 1, true , _aload_0 );
def(_fast_faccess_0 , "fast_faccess_0" , "b_JJ" , null , BasicType.getTObject() , 1, true , _aload_0 );
- def(_fast_iload , "fast_iload" , "bi" , null , BasicType.getTInt() , 1, false, _iload);
- def(_fast_iload2 , "fast_iload2" , "bi_i" , null , BasicType.getTInt() , 2, false, _iload);
- def(_fast_icaload , "fast_icaload" , "bi_" , null , BasicType.getTInt() , 0, false, _iload);
+ def(_fast_iload , "fast_iload" , "bi" , null , BasicType.getTInt() , 1, false, _iload );
+ def(_fast_iload2 , "fast_iload2" , "bi_i" , null , BasicType.getTInt() , 2, false, _iload );
+ def(_fast_icaload , "fast_icaload" , "bi_" , null , BasicType.getTInt() , 0, false, _iload );
// Faster method invocation.
- def(_fast_invokevfinal , "fast_invokevfinal" , "bJJ" , null , BasicType.getTIllegal(), -1, true, _invokevirtual);
+ def(_fast_invokevfinal , "fast_invokevfinal" , "bJJ" , null , BasicType.getTIllegal(), -1, true, _invokevirtual );
def(_fast_linearswitch , "fast_linearswitch" , "" , null , BasicType.getTVoid() , -1, false, _lookupswitch );
def(_fast_binaryswitch , "fast_binaryswitch" , "" , null , BasicType.getTVoid() , -1, false, _lookupswitch );
+ def(_fast_aldc , "fast_aldc" , "bj" , null , BasicType.getTObject(), 1, true, _ldc );
+ def(_fast_aldc_w , "fast_aldc_w" , "bJJ" , null , BasicType.getTObject(), 1, true, _ldc_w );
def(_return_register_finalizer, "return_register_finalizer", "b" , null , BasicType.getTVoid() , 0, true, _return );
- def(_fast_aldc , "fast_aldc" , "bj" , null , BasicType.getTObject(), 1, true, _ldc );
- def(_fast_aldc_w , "fast_aldc_w" , "bJJ" , null , BasicType.getTObject(), 1, true, _ldc_w );
+ // special handling of signature-polymorphic methods
+ def(_invokehandle , "invokehandle" , "bJJ" , null , BasicType.getTIllegal(), -1, true, _invokevirtual );
def(_shouldnotreachhere , "_shouldnotreachhere" , "b" , null , BasicType.getTVoid() , 0, false);
diff --git a/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java b/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java
index a8df44184..b0cc278b8 100644
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java
@@ -30,24 +30,10 @@ import sun.jvm.hotspot.utilities.PlatformInfo;
/** Encapsulates some byte-swapping operations defined in the VM */
public class Bytes {
- // swap if client platform is different from server's.
private boolean swap;
public Bytes(MachineDescription machDesc) {
- String cpu = PlatformInfo.getCPU();
- if (cpu.equals("sparc")) {
- if (machDesc.isBigEndian()) {
- swap = false;
- } else {
- swap = true;
- }
- } else { // intel
- if (machDesc.isBigEndian()) {
- swap = true;
- } else {
- swap = false;
- }
- }
+ swap = !machDesc.isBigEndian();
}
/** Should only swap if the hardware's underlying byte order is
diff --git a/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java b/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java
index 2a3fa0236..40dc912fd 100644
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java
@@ -29,6 +29,11 @@ import sun.jvm.hotspot.interpreter.*;
import sun.jvm.hotspot.utilities.*;
import sun.jvm.hotspot.debugger.*;
import sun.jvm.hotspot.runtime.*;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.security.AccessControlContext;
+import java.security.PrivilegedExceptionAction;
+import java.security.PrivilegedActionException;
public class ByteCodeRewriter
{
@@ -38,8 +43,20 @@ public class ByteCodeRewriter
private byte[] code;
private Bytes bytes;
- public static final boolean DEBUG = false;
private static final int jintSize = 4;
+ public static final boolean DEBUG;
+
+ static {
+ String debug = (String) AccessController.doPrivileged(
+ new PrivilegedAction() {
+ public Object run() {
+ return System.getProperty("sun.jvm.hotspot.tools.jcore.ByteCodeRewriter.DEBUG");
+ }
+ }
+ );
+ DEBUG = (debug != null ? debug.equalsIgnoreCase("true") : false);
+ }
+
protected void debugMessage(String message) {
System.out.println(message);
@@ -54,6 +71,18 @@ public class ByteCodeRewriter
}
+ protected short getConstantPoolIndexFromRefMap(int rawcode, int bci) {
+ int refIndex;
+ String fmt = Bytecodes.format(rawcode);
+ switch (fmt.length()) {
+ case 2: refIndex = 0xFF & method.getBytecodeByteArg(bci); break;
+ case 3: refIndex = 0xFFFF & bytes.swapShort(method.getBytecodeShortArg(bci)); break;
+ default: throw new IllegalArgumentException();
+ }
+
+ return (short)cpool.objectToCPIndex(refIndex);
+ }
+
protected short getConstantPoolIndex(int rawcode, int bci) {
// get ConstantPool index from ConstantPoolCacheIndex at given bci
String fmt = Bytecodes.format(rawcode);
@@ -95,6 +124,12 @@ public class ByteCodeRewriter
int hotspotcode = Bytecodes._illegal;
int len = 0;
+ if (DEBUG) {
+ String msg = method.getMethodHolder().getName().asString() + "." +
+ method.getName().asString() +
+ method.getSignature().asString();
+ debugMessage(msg);
+ }
for (int bci = 0; bci < code.length;) {
hotspotcode = Bytecodes.codeAt(method, bci);
bytecode = Bytecodes.javaCode(hotspotcode);
@@ -133,15 +168,15 @@ public class ByteCodeRewriter
case Bytecodes._ldc_w:
if (hotspotcode != bytecode) {
- // fast_aldc_w puts constant in CP cache
- cpoolIndex = getConstantPoolIndex(hotspotcode, bci + 1);
+ // fast_aldc_w puts constant in reference map
+ cpoolIndex = getConstantPoolIndexFromRefMap(hotspotcode, bci + 1);
writeShort(code, bci + 1, cpoolIndex);
}
break;
case Bytecodes._ldc:
if (hotspotcode != bytecode) {
- // fast_aldc puts constant in CP cache
- cpoolIndex = getConstantPoolIndex(hotspotcode, bci + 1);
+ // fast_aldc puts constant in reference map
+ cpoolIndex = getConstantPoolIndexFromRefMap(hotspotcode, bci + 1);
code[bci + 1] = (byte)(cpoolIndex);
}
break;
diff --git a/make/excludeSrc.make b/make/excludeSrc.make
index 00a1e3a70..721aea5c7 100644
--- a/make/excludeSrc.make
+++ b/make/excludeSrc.make
@@ -79,10 +79,10 @@ ifeq ($(INCLUDE_ALTERNATE_GCS), false)
CXXFLAGS += -DSERIALGC
CFLAGS += -DSERIALGC
Src_Files_EXCLUDE += \
- binaryTreeDictionary.cpp cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \
+ cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \
cmsGCAdaptivePolicyCounters.cpp cmsLockVerifier.cpp cmsPermGen.cpp compactibleFreeListSpace.cpp \
- concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp freeBlockDictionary.cpp \
- freeChunk.cpp freeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \
+ concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp \
+ freeChunk.cpp adaptiveFreeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \
concurrentG1Refine.cpp concurrentG1RefineThread.cpp concurrentMark.cpp concurrentMarkThread.cpp \
dirtyCardQueue.cpp g1AllocRegion.cpp g1BlockOffsetTable.cpp g1CollectedHeap.cpp g1GCPhaseTimes.cpp \
g1CollectorPolicy.cpp g1ErgoVerbose.cpp g1_globals.cpp g1HRPrinter.cpp g1MarkSweep.cpp \
diff --git a/make/hotspot_version b/make/hotspot_version
index 14577a128..1f72e227a 100644
--- a/make/hotspot_version
+++ b/make/hotspot_version
@@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2012
HS_MAJOR_VER=25
HS_MINOR_VER=0
-HS_BUILD_NUMBER=06
+HS_BUILD_NUMBER=07
JDK_MAJOR_VER=1
JDK_MINOR_VER=8
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 378c1f8c6..6b9677d30 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -1007,6 +1007,67 @@ void Assembler::addss(XMMRegister dst, Address src) {
emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
}
+void Assembler::aesdec(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xde);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xde);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdf);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdf);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesenc(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdc);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdc);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesenclast(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdd);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdd);
+ emit_byte(0xC0 | encode);
+}
+
+
void Assembler::andl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -2307,6 +2368,22 @@ void Assembler::prefix(Prefix p) {
a_byte(p);
}
+void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_ssse3(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0x00);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::pshufb(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_ssse3(), "");
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0x00);
+ emit_operand(dst, src);
+}
+
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -8067,6 +8144,15 @@ void MacroAssembler::movptr(Address dst, Register src) {
LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
}
+void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::movdqu(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::movdqu(dst, Address(rscratch1, 0));
+ }
+}
+
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::movsd(dst, as_Address(src));
@@ -8357,6 +8443,17 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
}
}
+void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-bit flipping with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::pshufb(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::pshufb(dst, Address(rscratch1, 0));
+ }
+}
+
// AVX 3-operands instructions
void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index c936e13f5..8a9bbaf42 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -875,6 +875,17 @@ private:
void addss(XMMRegister dst, Address src);
void addss(XMMRegister dst, XMMRegister src);
+ // AES instructions
+ void aesdec(XMMRegister dst, Address src);
+ void aesdec(XMMRegister dst, XMMRegister src);
+ void aesdeclast(XMMRegister dst, Address src);
+ void aesdeclast(XMMRegister dst, XMMRegister src);
+ void aesenc(XMMRegister dst, Address src);
+ void aesenc(XMMRegister dst, XMMRegister src);
+ void aesenclast(XMMRegister dst, Address src);
+ void aesenclast(XMMRegister dst, XMMRegister src);
+
+
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
@@ -1424,6 +1435,10 @@ private:
void prefetcht2(Address src);
void prefetchw(Address src);
+ // Shuffle Bytes
+ void pshufb(XMMRegister dst, XMMRegister src);
+ void pshufb(XMMRegister dst, Address src);
+
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
void pshufd(XMMRegister dst, Address src, int mode);
@@ -2611,6 +2626,12 @@ public:
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, AddressLiteral src);
+ // Move Unaligned Double Quadword
+ void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, AddressLiteral src);
+
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
@@ -2658,6 +2679,10 @@ public:
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src);
+ // Shuffle Bytes
+ void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
+ void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
+ void pshufb(XMMRegister dst, AddressLiteral src);
// AVX 3-operands instructions
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index f149fde83..d8b61e0b2 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -2137,6 +2137,529 @@ class StubGenerator: public StubCodeGenerator {
}
}
+ // AES intrinsic stubs
+ enum {AESBlockSize = 16};
+
+ address generate_key_shuffle_mask() {
+ __ align(16);
+ StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+ address start = __ pc();
+ __ emit_data(0x00010203, relocInfo::none, 0 );
+ __ emit_data(0x04050607, relocInfo::none, 0 );
+ __ emit_data(0x08090a0b, relocInfo::none, 0 );
+ __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
+ return start;
+ }
+
+ // Utility routine for loading a 128-bit key word in little endian format
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ __ movdqu(xmmdst, Address(key, offset));
+ if (xmm_shuf_mask != NULL) {
+ __ pshufb(xmmdst, xmm_shuf_mask);
+ } else {
+ __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ }
+ }
+
+ // aesenc using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesenc(xmmdst, xmmtmp);
+ }
+
+ // aesdec using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesdec(xmmdst, xmmtmp);
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_encryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register keylen = rax;
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ push(rsi);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
+
+ // For encryption, the java expanded key ordering is just what we need
+
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ pxor(xmm_result, xmm_temp);
+ for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ __ aesenclast(xmm_result, xmm_temp);
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+ __ xorptr(rax, rax); // return 0
+ __ pop(rsi);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_decryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register keylen = rax;
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ push(rsi);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0));
+
+ // for decryption java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ // we don't know if the key is aligned, hence not using load-execute form
+ load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
+ __ pxor (xmm_result, xmm_temp);
+ for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
+ aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 192 and 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ // for decryption the aesdeclast operation is always on key+0x00
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ aesdeclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+
+ __ xorptr(rax, rax); // return 0
+ __ pop(rsi);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+ void handleSOERegisters(bool saving) {
+ const int saveFrameSizeInBytes = 4 * wordSize;
+ const Address saved_rbx (rbp, -3 * wordSize);
+ const Address saved_rsi (rbp, -2 * wordSize);
+ const Address saved_rdi (rbp, -1 * wordSize);
+
+ if (saving) {
+ __ subptr(rsp, saveFrameSizeInBytes);
+ __ movptr(saved_rsi, rsi);
+ __ movptr(saved_rdi, rdi);
+ __ movptr(saved_rbx, rbx);
+ } else {
+ // restoring
+ __ movptr(rsi, saved_rsi);
+ __ movptr(rdi, saved_rdi);
+ __ movptr(rbx, saved_rbx);
+ }
+ }
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register rvec = rdi; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+ const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ // first 6 keys preloaded into xmm2-xmm7
+ const int XMM_REG_NUM_KEY_FIRST = 2;
+ const int XMM_REG_NUM_KEY_LAST = 7;
+ const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ handleSOERegisters(true /*saving*/);
+
+ // load registers from incoming parameters
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+ const Address rvec_param (rbp, 8+12);
+ const Address len_param (rbp, 8+16);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+ __ movptr(rvec , rvec_param);
+ __ movptr(len_reg , len_param);
+
+ const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 2 thru 7 with keys 0-5
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+ // 128 bit code follows here
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_128);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0xa0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_128);
+
+ __ BIND(L_exit);
+ __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
+
+ handleSOERegisters(false /*restoring*/);
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_192);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0xc0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_256);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0xe0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
+ // CBC AES Decryption.
+ // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
+ //
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+
+ address generate_cipherBlockChaining_decryptAESCrypt() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256;
+ Label L_singleBlock_loopTop_128;
+ Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register rvec = rdi; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+ const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ // first 6 keys preloaded into xmm2-xmm7
+ const int XMM_REG_NUM_KEY_FIRST = 2;
+ const int XMM_REG_NUM_KEY_LAST = 7;
+ const int FIRST_NON_REG_KEY_offset = 0x70;
+ const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ handleSOERegisters(true /*saving*/);
+
+ // load registers from incoming parameters
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+ const Address rvec_param (rbp, 8+12);
+ const Address len_param (rbp, 8+16);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+ __ movptr(rvec , rvec_param);
+ __ movptr(len_reg , len_param);
+
+ // the java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 2 thru 6 with first 5 keys
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ // inside here, use the rvec register to point to previous block cipher
+ // with which we xor at the end of each newly decrypted block
+ const Register prev_block_cipher_ptr = rvec;
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+ // 128-bit code follows here, parallelized
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_128);
+ __ cmpptr(len_reg, 0); // any blocks left??
+ __ jcc(Assembler::equal, L_exit);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0
+ aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
+ __ aesdeclast(xmm_result, xmm_temp);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jmp(L_singleBlock_loopTop_128);
+
+
+ __ BIND(L_exit);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ movptr(rvec , rvec_param); // restore this since used in loop
+ __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
+ handleSOERegisters(false /*restoring*/);
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_192);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0
+ aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
+ __ aesdeclast(xmm_result, xmm_temp);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_256);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
+ aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
+ __ aesdeclast(xmm_result, xmm_temp);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
public:
// Information about frame layout at time of blocking runtime call.
// Note that we only have to preserve callee-saved registers since
@@ -2332,6 +2855,16 @@ class StubGenerator: public StubCodeGenerator {
generate_arraycopy_stubs();
generate_math_stubs();
+
+ // don't bother generating these AES intrinsic stubs unless global flag is set
+ if (UseAESIntrinsics) {
+ StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
+
+ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+ }
}
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 8ae595a56..3e223387c 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -2941,6 +2941,548 @@ class StubGenerator: public StubCodeGenerator {
}
}
+ // AES intrinsic stubs
+ enum {AESBlockSize = 16};
+
+ address generate_key_shuffle_mask() {
+ __ align(16);
+ StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+ address start = __ pc();
+ __ emit_data64( 0x0405060700010203, relocInfo::none );
+ __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
+ return start;
+ }
+
+ // Utility routine for loading a 128-bit key word in little endian format
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ __ movdqu(xmmdst, Address(key, offset));
+ if (xmm_shuf_mask != NULL) {
+ __ pshufb(xmmdst, xmm_shuf_mask);
+ } else {
+ __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ }
+ }
+
+ // aesenc using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesenc(xmmdst, xmmtmp);
+ }
+
+ // aesdec using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesdec(xmmdst, xmmtmp);
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_encryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register keylen = rax;
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
+
+ // For encryption, the java expanded key ordering is just what we need
+ // we don't know if the key is aligned, hence not using load-execute form
+
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ pxor(xmm_result, xmm_temp);
+ for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ __ aesenclast(xmm_result, xmm_temp);
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+ __ xorptr(rax, rax); // return 0
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_decryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register keylen = rax;
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0));
+
+ // for decryption java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ // we don't know if the key is aligned, hence not using load-execute form
+ load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
+ __ pxor (xmm_result, xmm_temp);
+ for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
+ aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 192 and 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ // for decryption the aesdeclast operation is always on key+0x00
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ aesdeclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+
+ __ xorptr(rax, rax); // return 0
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register rvec = c_rarg3; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+#ifndef _WIN64
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+#else
+ const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
+ const Register len_reg = r10; // pick the first volatile windows register
+#endif
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ // keys 0-10 preloaded into xmm2-xmm12
+ const int XMM_REG_NUM_KEY_FIRST = 2;
+ const int XMM_REG_NUM_KEY_LAST = 12;
+ const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+ const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+ // on win64, fill len_reg from stack position
+ __ movl(len_reg, len_mem);
+ // save the xmm registers which must be preserved 6-12
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
+#endif
+
+ const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 2 thru 12 with key 0x00 - 0xa0
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+ // 128 bit code follows here
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_128);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ __ aesenclast(xmm_result, xmm_key10);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_128);
+
+ __ BIND(L_exit);
+ __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
+
+#ifdef _WIN64
+ // restore xmm regs belonging to calling function
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
+#endif
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_192);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
+ load_key(xmm_temp, key, 0xc0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_256);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
+ aes_enc_key(xmm_result, xmm_temp, key, 0xc0);
+ aes_enc_key(xmm_result, xmm_temp, key, 0xd0);
+ load_key(xmm_temp, key, 0xe0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
+
+ // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
+ // to hide instruction latency
+ //
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+
+ address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256;
+ Label L_singleBlock_loopTop_128, L_multiBlock_loopTop_128;
+ Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register rvec = c_rarg3; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+#ifndef _WIN64
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+#else
+ const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
+ const Register len_reg = r10; // pick the first volatile windows register
+#endif
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ // keys 0-10 preloaded into xmm2-xmm12
+ const int XMM_REG_NUM_KEY_FIRST = 5;
+ const int XMM_REG_NUM_KEY_LAST = 15;
+ const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+ const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+ // on win64, fill len_reg from stack position
+ __ movl(len_reg, len_mem);
+ // save the xmm registers which must be preserved 6-15
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
+#endif
+ // the java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00;
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block
+ // registers holding the four results in the parallelized loop
+ const XMMRegister xmm_result0 = xmm0;
+ const XMMRegister xmm_result1 = xmm2;
+ const XMMRegister xmm_result2 = xmm3;
+ const XMMRegister xmm_result3 = xmm4;
+
+ __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+ // 128-bit code follows here, parallelized
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_multiBlock_loopTop_128);
+ __ cmpptr(len_reg, 4*AESBlockSize); // see if at least 4 blocks left
+ __ jcc(Assembler::less, L_singleBlock_loopTop_128);
+
+ __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize)); // get next 4 blocks into xmmresult registers
+ __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1*AESBlockSize));
+ __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2*AESBlockSize));
+ __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3*AESBlockSize));
+
+#define DoFour(opc, src_reg) \
+ __ opc(xmm_result0, src_reg); \
+ __ opc(xmm_result1, src_reg); \
+ __ opc(xmm_result2, src_reg); \
+ __ opc(xmm_result3, src_reg);
+
+ DoFour(pxor, xmm_key_first);
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ DoFour(aesdec, as_XMMRegister(rnum));
+ }
+ DoFour(aesdeclast, xmm_key_last);
+ // for each result, xor with the r vector of previous cipher block
+ __ pxor(xmm_result0, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0*AESBlockSize));
+ __ pxor(xmm_result1, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1*AESBlockSize));
+ __ pxor(xmm_result2, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2*AESBlockSize));
+ __ pxor(xmm_result3, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize)); // this will carry over to next set of blocks
+
+ __ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output
+ __ movdqu(Address(to, pos, Address::times_1, 1*AESBlockSize), xmm_result1);
+ __ movdqu(Address(to, pos, Address::times_1, 2*AESBlockSize), xmm_result2);
+ __ movdqu(Address(to, pos, Address::times_1, 3*AESBlockSize), xmm_result3);
+
+ __ addptr(pos, 4*AESBlockSize);
+ __ subptr(len_reg, 4*AESBlockSize);
+ __ jmp(L_multiBlock_loopTop_128);
+
+ // registers used in the non-parallelized loops
+ const XMMRegister xmm_prev_block_cipher_save = xmm2;
+ const XMMRegister xmm_temp = xmm3;
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_128);
+ __ cmpptr(len_reg, 0); // any blocks left??
+ __ jcc(Assembler::equal, L_exit);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ __ aesdeclast(xmm_result, xmm_key_last);
+ __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
+
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jmp(L_singleBlock_loopTop_128);
+
+
+ __ BIND(L_exit);
+ __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object
+#ifdef _WIN64
+ // restore regs belonging to calling function
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
+#endif
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_192);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+ __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0
+ __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
+
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_256);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xd0);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xe0);
+ __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
+ __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
+
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
+
#undef __
#define __ masm->
@@ -3135,6 +3677,16 @@ class StubGenerator: public StubCodeGenerator {
generate_arraycopy_stubs();
generate_math_stubs();
+
+ // don't bother generating these AES intrinsic stubs unless global flag is set
+ if (UseAESIntrinsics) {
+ StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
+
+ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
+ }
}
public:
diff --git a/src/cpu/x86/vm/stubRoutines_x86_32.cpp b/src/cpu/x86/vm/stubRoutines_x86_32.cpp
index 6ec4121b9..cfd4f33a6 100644
--- a/src/cpu/x86/vm/stubRoutines_x86_32.cpp
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.cpp
@@ -44,3 +44,4 @@
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
diff --git a/src/cpu/x86/vm/stubRoutines_x86_32.hpp b/src/cpu/x86/vm/stubRoutines_x86_32.hpp
index 64767c8ad..d53124fc6 100644
--- a/src/cpu/x86/vm/stubRoutines_x86_32.hpp
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.hpp
@@ -41,10 +41,14 @@ class x86 {
private:
static address _verify_mxcsr_entry;
static address _verify_fpu_cntrl_wrd_entry;
+ // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+ static address _key_shuffle_mask_addr;
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; }
+ static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+
};
static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
diff --git a/src/cpu/x86/vm/stubRoutines_x86_64.cpp b/src/cpu/x86/vm/stubRoutines_x86_64.cpp
index 084bbf8fb..cf8ec5d7b 100644
--- a/src/cpu/x86/vm/stubRoutines_x86_64.cpp
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.cpp
@@ -56,3 +56,4 @@ address StubRoutines::x86::_float_sign_flip = NULL;
address StubRoutines::x86::_double_sign_mask = NULL;
address StubRoutines::x86::_double_sign_flip = NULL;
address StubRoutines::x86::_mxcsr_std = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
diff --git a/src/cpu/x86/vm/stubRoutines_x86_64.hpp b/src/cpu/x86/vm/stubRoutines_x86_64.hpp
index 9b9cede4f..c3efeecb7 100644
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp
@@ -54,6 +54,8 @@ class x86 {
static address _double_sign_mask;
static address _double_sign_flip;
static address _mxcsr_std;
+ // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+ static address _key_shuffle_mask_addr;
public:
@@ -116,6 +118,9 @@ class x86 {
{
return _mxcsr_std;
}
+
+ static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+
};
#endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index bf7b3c213..182b0ab1a 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -419,13 +419,16 @@ void VM_Version::get_processor_features() {
if (UseAVX < 1)
_cpuFeatures &= ~CPU_AVX;
+ if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
+ _cpuFeatures &= ~CPU_AES;
+
if (logical_processors_per_package() == 1) {
// HT processor could be installed on a system which doesn't support HT.
_cpuFeatures &= ~CPU_HT;
}
char buf[256];
- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
@@ -441,6 +444,7 @@ void VM_Version::get_processor_features() {
(supports_popcnt() ? ", popcnt" : ""),
(supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""),
+ (supports_aes() ? ", aes" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""),
@@ -472,6 +476,29 @@ void VM_Version::get_processor_features() {
if (!supports_avx ()) // Drop to 0 if no AVX support
UseAVX = 0;
+ // Use AES instructions if available.
+ if (supports_aes()) {
+ if (FLAG_IS_DEFAULT(UseAES)) {
+ UseAES = true;
+ }
+ } else if (UseAES) {
+ if (!FLAG_IS_DEFAULT(UseAES))
+ warning("AES instructions not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+
+ // The AES intrinsic stubs require AES instruction support (of course)
+ // but also require AVX mode for misaligned SSE access
+ if (UseAES && (UseAVX > 0)) {
+ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ UseAESIntrinsics = true;
+ }
+ } else if (UseAESIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
+ warning("AES intrinsics not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+
#ifdef COMPILER2
if (UseFPUForSpilling) {
if (UseSSE < 2) {
@@ -714,6 +741,9 @@ void VM_Version::get_processor_features() {
if (UseAVX > 0) {
tty->print(" UseAVX=%d",UseAVX);
}
+ if (UseAES) {
+ tty->print(" UseAES=1");
+ }
tty->cr();
tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp b/src/cpu/x86/vm/vm_version_x86.hpp
index 92cdbd3fd..12bd3b770 100644
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -78,7 +78,9 @@ public:
sse4_2 : 1,
: 2,
popcnt : 1,
- : 3,
+ : 1,
+ aes : 1,
+ : 1,
osxsave : 1,
avx : 1,
: 3;
@@ -244,7 +246,8 @@ protected:
CPU_TSC = (1 << 15),
CPU_TSCINV = (1 << 16),
CPU_AVX = (1 << 17),
- CPU_AVX2 = (1 << 18)
+ CPU_AVX2 = (1 << 18),
+ CPU_AES = (1 << 19)
} cpuFeatureFlags;
enum {
@@ -420,6 +423,8 @@ protected:
result |= CPU_TSC;
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
result |= CPU_TSCINV;
+ if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
+ result |= CPU_AES;
// AMD features.
if (is_amd()) {
@@ -544,6 +549,7 @@ public:
static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
+ static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
// Intel features
static bool is_intel_family_core() { return is_intel() &&
diff --git a/src/cpu/x86/vm/x86.ad b/src/cpu/x86/vm/x86.ad
index a2cf6f794..c49d0e6c3 100644
--- a/src/cpu/x86/vm/x86.ad
+++ b/src/cpu/x86/vm/x86.ad
@@ -4102,9 +4102,158 @@ instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
// ----------------------- LogicalRightShift -----------------------------------
-// Shorts/Chars vector logical right shift produces incorrect Java result
+// Shorts vector logical right shift produces incorrect Java result
// for negative data because java code convert short value into int with
-// sign extension before a shift.
+// sign extension before a shift. But char vectors are fine since chars are
+// unsigned values.
+
+instruct vsrl2S(vecS dst, vecS shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_imm(vecS dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S(vecD dst, vecS shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_imm(vecD dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S(vecX dst, vecS shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
// Integers vector logical right shift
instruct vsrl2I(vecD dst, vecS shift) %{
diff --git a/src/os/bsd/vm/perfMemory_bsd.cpp b/src/os/bsd/vm/perfMemory_bsd.cpp
index 123e6e289..cb9dada90 100644
--- a/src/os/bsd/vm/perfMemory_bsd.cpp
+++ b/src/os/bsd/vm/perfMemory_bsd.cpp
@@ -30,6 +30,7 @@
#include "os_bsd.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
#include "utilities/exceptions.hpp"
// put OS-includes here
@@ -753,6 +754,10 @@ static char* mmap_create_shared(size_t size) {
// clear the shared memory region
(void)::memset((void*) mapAddress, 0, size);
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
return mapAddress;
}
@@ -912,6 +917,10 @@ static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemor
"Could not map PerfMemory");
}
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
*addr = mapAddress;
*sizep = size;
diff --git a/src/os/linux/vm/perfMemory_linux.cpp b/src/os/linux/vm/perfMemory_linux.cpp
index 2adae8d18..b54c5db14 100644
--- a/src/os/linux/vm/perfMemory_linux.cpp
+++ b/src/os/linux/vm/perfMemory_linux.cpp
@@ -30,6 +30,7 @@
#include "os_linux.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
#include "utilities/exceptions.hpp"
// put OS-includes here
@@ -753,6 +754,10 @@ static char* mmap_create_shared(size_t size) {
// clear the shared memory region
(void)::memset((void*) mapAddress, 0, size);
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
return mapAddress;
}
@@ -912,6 +917,10 @@ static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemor
"Could not map PerfMemory");
}
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
*addr = mapAddress;
*sizep = size;
diff --git a/src/os/solaris/vm/os_solaris.cpp b/src/os/solaris/vm/os_solaris.cpp
index 0483dcbfb..e3415b8d8 100644
--- a/src/os/solaris/vm/os_solaris.cpp
+++ b/src/os/solaris/vm/os_solaris.cpp
@@ -55,6 +55,7 @@
#include "runtime/threadCritical.hpp"
#include "runtime/timer.hpp"
#include "services/attachListener.hpp"
+#include "services/memTracker.hpp"
#include "services/runtimeService.hpp"
#include "thread_solaris.inline.hpp"
#include "utilities/decoder.hpp"
@@ -3072,11 +3073,12 @@ char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
// Since snv_84, Solaris attempts to honor the address hint - see 5003415.
// Give it a try, if the kernel honors the hint we can return immediately.
char* addr = Solaris::anon_mmap(requested_addr, bytes, 0, false);
+
volatile int err = errno;
if (addr == requested_addr) {
return addr;
} else if (addr != NULL) {
- unmap_memory(addr, bytes);
+ pd_unmap_memory(addr, bytes);
}
if (PrintMiscellaneous && Verbose) {
diff --git a/src/os/solaris/vm/perfMemory_solaris.cpp b/src/os/solaris/vm/perfMemory_solaris.cpp
index c24789cbb..ebbc00b30 100644
--- a/src/os/solaris/vm/perfMemory_solaris.cpp
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp
@@ -30,6 +30,7 @@
#include "os_solaris.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
#include "utilities/exceptions.hpp"
// put OS-includes here
@@ -768,6 +769,10 @@ static char* mmap_create_shared(size_t size) {
// clear the shared memory region
(void)::memset((void*) mapAddress, 0, size);
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
return mapAddress;
}
@@ -927,6 +932,10 @@ static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemor
"Could not map PerfMemory");
}
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
*addr = mapAddress;
*sizep = size;
diff --git a/src/os/windows/vm/perfMemory_windows.cpp b/src/os/windows/vm/perfMemory_windows.cpp
index 2c75539d8..061c9d84a 100644
--- a/src/os/windows/vm/perfMemory_windows.cpp
+++ b/src/os/windows/vm/perfMemory_windows.cpp
@@ -30,6 +30,7 @@
#include "os_windows.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
#include "utilities/exceptions.hpp"
#include <windows.h>
@@ -1496,6 +1497,10 @@ static char* mapping_create_shared(size_t size) {
// clear the shared memory region
(void)memset(mapAddress, '\0', size);
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
return (char*) mapAddress;
}
@@ -1672,6 +1677,11 @@ static void open_file_mapping(const char* user, int vmid,
"Could not map PerfMemory");
}
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC);
+ MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal);
+
+
*addrp = (char*)mapAddress;
*sizep = size;
@@ -1824,6 +1834,8 @@ void PerfMemory::detach(char* addr, size_t bytes, TRAPS) {
}
remove_file_mapping(addr);
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_release((address)addr, bytes);
}
char* PerfMemory::backing_store_filename() {
diff --git a/src/share/vm/c1/c1_GraphBuilder.cpp b/src/share/vm/c1/c1_GraphBuilder.cpp
index 1bbdc5afb..941dd120a 100644
--- a/src/share/vm/c1/c1_GraphBuilder.cpp
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp
@@ -1844,17 +1844,12 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
code == Bytecodes::_invokevirtual && target->is_final_method() ||
code == Bytecodes::_invokedynamic) {
ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target;
- bool success = false;
- if (target->is_method_handle_intrinsic()) {
- // method handle invokes
- success = try_method_handle_inline(target);
- } else {
- // static binding => check if callee is ok
- success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
- }
- CHECK_BAILOUT();
+ // static binding => check if callee is ok
+ bool success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
+ CHECK_BAILOUT();
clear_inline_bailout();
+
if (success) {
// Register dependence if JVMTI has either breakpoint
// setting or hotswapping of methods capabilities since they may
@@ -3201,6 +3196,11 @@ bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, Bytecodes::Co
return false;
}
+ // method handle invokes
+ if (callee->is_method_handle_intrinsic()) {
+ return try_method_handle_inline(callee);
+ }
+
// handle intrinsics
if (callee->intrinsic_id() != vmIntrinsics::_none) {
if (try_inline_intrinsics(callee)) {
@@ -3885,10 +3885,14 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) {
ValueType* type = state()->stack_at(args_base)->type();
if (type->is_constant()) {
ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget();
- guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove
- Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
- if (try_inline(target, /*holder_known*/ true, bc)) {
- return true;
+ // We don't do CHA here so only inline static and statically bindable methods.
+ if (target->is_static() || target->can_be_statically_bound()) {
+ Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+ if (try_inline(target, /*holder_known*/ true, bc)) {
+ return true;
+ }
+ } else {
+ print_inlining(target, "not static or statically bindable", /*success*/ false);
}
} else {
print_inlining(callee, "receiver not constant", /*success*/ false);
@@ -3941,9 +3945,14 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) {
}
j += t->size(); // long and double take two slots
}
- Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
- if (try_inline(target, /*holder_known*/ true, bc)) {
- return true;
+ // We don't do CHA here so only inline static and statically bindable methods.
+ if (target->is_static() || target->can_be_statically_bound()) {
+ Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+ if (try_inline(target, /*holder_known*/ true, bc)) {
+ return true;
+ }
+ } else {
+ print_inlining(target, "not static or statically bindable", /*success*/ false);
}
}
} else {
diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp
index 06fdb35be..2febc7b56 100644
--- a/src/share/vm/classfile/vmSymbols.hpp
+++ b/src/share/vm/classfile/vmSymbols.hpp
@@ -110,6 +110,7 @@
template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \
template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \
template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \
+ template(sun_misc_Launcher_ExtClassLoader, "sun/misc/Launcher$ExtClassLoader") \
\
/* Java runtime version access */ \
template(sun_misc_Version, "sun/misc/Version") \
@@ -723,6 +724,21 @@
/* java/lang/ref/Reference */ \
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\
+ /* support for com.sum.crypto.provider.AESCrypt and some of its callers */ \
+ do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \
+ do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
+ do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
+ do_name( encryptBlock_name, "encryptBlock") \
+ do_name( decryptBlock_name, "decryptBlock") \
+ do_signature(byteArray_int_byteArray_int_signature, "([BI[BI)V") \
+ \
+ do_class(com_sun_crypto_provider_cipherBlockChaining, "com/sun/crypto/provider/CipherBlockChaining") \
+ do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
+ do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
+ do_name( encrypt_name, "encrypt") \
+ do_name( decrypt_name, "decrypt") \
+ do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \
+ \
/* support for sun.misc.Unsafe */ \
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
\
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
new file mode 100644
index 000000000..01e0e8745
--- /dev/null
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/sharedHeap.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/vmThread.hpp"
+
+template <>
+void AdaptiveFreeList<FreeChunk>::print_on(outputStream* st, const char* c) const {
+ if (c != NULL) {
+ st->print("%16s", c);
+ } else {
+ st->print(SIZE_FORMAT_W(16), size());
+ }
+ st->print("\t"
+ SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
+ SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
+ bfr_surp(), surplus(), desired(), prev_sweep(), before_sweep(),
+ count(), coal_births(), coal_deaths(), split_births(), split_deaths());
+}
+
+template <class Chunk>
+AdaptiveFreeList<Chunk>::AdaptiveFreeList() : FreeList<Chunk>(), _hint(0) {
+ init_statistics();
+}
+
+template <class Chunk>
+AdaptiveFreeList<Chunk>::AdaptiveFreeList(Chunk* fc) : FreeList<Chunk>(fc), _hint(0) {
+ init_statistics();
+#ifndef PRODUCT
+ _allocation_stats.set_returned_bytes(size() * HeapWordSize);
+#endif
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::initialize() {
+ FreeList<Chunk>::initialize();
+ set_hint(0);
+ init_statistics(true /* split_birth */);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::reset(size_t hint) {
+ FreeList<Chunk>::reset();
+ set_hint(hint);
+}
+
+#ifndef PRODUCT
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::assert_proper_lock_protection_work() const {
+ assert(protecting_lock() != NULL, "Don't call this directly");
+ assert(ParallelGCThreads > 0, "Don't call this directly");
+ Thread* thr = Thread::current();
+ if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+ // assert that we are holding the freelist lock
+ } else if (thr->is_GC_task_thread()) {
+ assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED");
+ } else if (thr->is_Java_thread()) {
+ assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+ } else {
+ ShouldNotReachHere(); // unaccounted thread type?
+ }
+}
+#endif
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::init_statistics(bool split_birth) {
+ _allocation_stats.initialize(split_birth);
+}
+
+template <class Chunk>
+size_t AdaptiveFreeList<Chunk>::get_better_size() {
+
+ // A candidate chunk has been found. If it is already under
+ // populated and there is a hinT, REturn the hint(). Else
+ // return the size of this chunk.
+ if (surplus() <= 0) {
+ if (hint() != 0) {
+ return hint();
+ } else {
+ return size();
+ }
+ } else {
+ // This list has a surplus so use it.
+ return size();
+ }
+}
+
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_head(Chunk* chunk) {
+ assert_proper_lock_protection();
+ return_chunk_at_head(chunk, true);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_head(Chunk* chunk, bool record_return) {
+ FreeList<Chunk>::return_chunk_at_head(chunk, record_return);
+#ifdef ASSERT
+ if (record_return) {
+ increment_returned_bytes_by(size()*HeapWordSize);
+ }
+#endif
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_tail(Chunk* chunk) {
+ return_chunk_at_tail(chunk, true);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_tail(Chunk* chunk, bool record_return) {
+ FreeList<Chunk>::return_chunk_at_tail(chunk, record_return);
+#ifdef ASSERT
+ if (record_return) {
+ increment_returned_bytes_by(size()*HeapWordSize);
+ }
+#endif
+}
+
+#ifndef PRODUCT
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::verify_stats() const {
+ // The +1 of the LH comparand is to allow some "looseness" in
+ // checking: we usually call this interface when adding a block
+ // and we'll subsequently update the stats; we cannot update the
+ // stats beforehand because in the case of the large-block BT
+ // dictionary for example, this might be the first block and
+ // in that case there would be no place that we could record
+ // the stats (which are kept in the block itself).
+ assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births()
+ + _allocation_stats.coal_births() + 1) // Total Production Stock + 1
+ >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths()
+ + (ssize_t)count()), // Total Current Stock + depletion
+ err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
+ " violates Conservation Principle: "
+ "prev_sweep(" SIZE_FORMAT ")"
+ " + split_births(" SIZE_FORMAT ")"
+ " + coal_births(" SIZE_FORMAT ") + 1 >= "
+ " split_deaths(" SIZE_FORMAT ")"
+ " coal_deaths(" SIZE_FORMAT ")"
+ " + count(" SSIZE_FORMAT ")",
+ this, size(), _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
+ _allocation_stats.split_births(), _allocation_stats.split_deaths(),
+ _allocation_stats.coal_deaths(), count()));
+}
+#endif
+
+// Needs to be after the definitions have been seen.
+template class AdaptiveFreeList<FreeChunk>;
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp
new file mode 100644
index 000000000..8b56bb11d
--- /dev/null
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+#define SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+
+#include "memory/freeList.hpp"
+#include "gc_implementation/shared/allocationStats.hpp"
+
+class CompactibleFreeListSpace;
+
+// A class for maintaining a free list of Chunk's. The FreeList
+// maintains a the structure of the list (head, tail, etc.) plus
+// statistics for allocations from the list. The links between items
+// are not part of FreeList. The statistics are
+// used to make decisions about coalescing Chunk's when they
+// are swept during collection.
+//
+// See the corresponding .cpp file for a description of the specifics
+// for that implementation.
+
+class Mutex;
+
+template <class Chunk>
+class AdaptiveFreeList : public FreeList<Chunk> {
+ friend class CompactibleFreeListSpace;
+ friend class VMStructs;
+ // friend class PrintTreeCensusClosure<Chunk, FreeList_t>;
+
+ size_t _hint; // next larger size list with a positive surplus
+
+ AllocationStats _allocation_stats; // allocation-related statistics
+
+ public:
+
+ AdaptiveFreeList();
+ AdaptiveFreeList(Chunk* fc);
+
+ using FreeList<Chunk>::assert_proper_lock_protection;
+#ifdef ASSERT
+ using FreeList<Chunk>::protecting_lock;
+#endif
+ using FreeList<Chunk>::count;
+ using FreeList<Chunk>::size;
+ using FreeList<Chunk>::verify_chunk_in_free_list;
+ using FreeList<Chunk>::getFirstNChunksFromList;
+ using FreeList<Chunk>::print_on;
+ void return_chunk_at_head(Chunk* fc, bool record_return);
+ void return_chunk_at_head(Chunk* fc);
+ void return_chunk_at_tail(Chunk* fc, bool record_return);
+ void return_chunk_at_tail(Chunk* fc);
+ using FreeList<Chunk>::return_chunk_at_tail;
+ using FreeList<Chunk>::remove_chunk;
+ using FreeList<Chunk>::prepend;
+ using FreeList<Chunk>::print_labels_on;
+ using FreeList<Chunk>::get_chunk_at_head;
+
+ // Initialize.
+ void initialize();
+
+ // Reset the head, tail, hint, and count of a free list.
+ void reset(size_t hint);
+
+ void assert_proper_lock_protection_work() const PRODUCT_RETURN;
+
+ void print_on(outputStream* st, const char* c = NULL) const;
+
+ size_t hint() const {
+ return _hint;
+ }
+ void set_hint(size_t v) {
+ assert_proper_lock_protection();
+ assert(v == 0 || size() < v, "Bad hint");
+ _hint = v;
+ }
+
+ size_t get_better_size();
+
+ // Accessors for statistics
+ void init_statistics(bool split_birth = false);
+
+ AllocationStats* allocation_stats() {
+ assert_proper_lock_protection();
+ return &_allocation_stats;
+ }
+
+ ssize_t desired() const {
+ return _allocation_stats.desired();
+ }
+ void set_desired(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_desired(v);
+ }
+ void compute_desired(float inter_sweep_current,
+ float inter_sweep_estimate,
+ float intra_sweep_estimate) {
+ assert_proper_lock_protection();
+ _allocation_stats.compute_desired(count(),
+ inter_sweep_current,
+ inter_sweep_estimate,
+ intra_sweep_estimate);
+ }
+ ssize_t coal_desired() const {
+ return _allocation_stats.coal_desired();
+ }
+ void set_coal_desired(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_coal_desired(v);
+ }
+
+ ssize_t surplus() const {
+ return _allocation_stats.surplus();
+ }
+ void set_surplus(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_surplus(v);
+ }
+ void increment_surplus() {
+ assert_proper_lock_protection();
+ _allocation_stats.increment_surplus();
+ }
+ void decrement_surplus() {
+ assert_proper_lock_protection();
+ _allocation_stats.decrement_surplus();
+ }
+
+ ssize_t bfr_surp() const {
+ return _allocation_stats.bfr_surp();
+ }
+ void set_bfr_surp(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_bfr_surp(v);
+ }
+ ssize_t prev_sweep() const {
+ return _allocation_stats.prev_sweep();
+ }
+ void set_prev_sweep(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_prev_sweep(v);
+ }
+ ssize_t before_sweep() const {
+ return _allocation_stats.before_sweep();
+ }
+ void set_before_sweep(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_before_sweep(v);
+ }
+
+ ssize_t coal_births() const {
+ return _allocation_stats.coal_births();
+ }
+ void set_coal_births(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_coal_births(v);
+ }
+ void increment_coal_births() {
+ assert_proper_lock_protection();
+ _allocation_stats.increment_coal_births();
+ }
+
+ ssize_t coal_deaths() const {
+ return _allocation_stats.coal_deaths();
+ }
+ void set_coal_deaths(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_coal_deaths(v);
+ }
+ void increment_coal_deaths() {
+ assert_proper_lock_protection();
+ _allocation_stats.increment_coal_deaths();
+ }
+
+ ssize_t split_births() const {
+ return _allocation_stats.split_births();
+ }
+ void set_split_births(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_split_births(v);
+ }
+ void increment_split_births() {
+ assert_proper_lock_protection();
+ _allocation_stats.increment_split_births();
+ }
+
+ ssize_t split_deaths() const {
+ return _allocation_stats.split_deaths();
+ }
+ void set_split_deaths(ssize_t v) {
+ assert_proper_lock_protection();
+ _allocation_stats.set_split_deaths(v);
+ }
+ void increment_split_deaths() {
+ assert_proper_lock_protection();
+ _allocation_stats.increment_split_deaths();
+ }
+
+#ifndef PRODUCT
+ // For debugging. The "_returned_bytes" in all the lists are summed
+ // and compared with the total number of bytes swept during a
+ // collection.
+ size_t returned_bytes() const { return _allocation_stats.returned_bytes(); }
+ void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); }
+ void increment_returned_bytes_by(size_t v) {
+ _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v);
+ }
+ // Stats verification
+ void verify_stats() const;
+#endif // NOT PRODUCT
+};
+
+#endif // SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
index eac32b1ea..2f43c9876 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@@ -91,7 +91,7 @@ CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs,
_collector(NULL)
{
assert(sizeof(FreeChunk) / BytesPerWord <= MinChunkSize,
- "FreeChunk is larger than expected");
+ "FreeChunk is larger than expected");
_bt.set_space(this);
initialize(mr, SpaceDecorator::Clear, SpaceDecorator::Mangle);
// We have all of "mr", all of which we place in the dictionary
@@ -101,14 +101,14 @@ CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs,
// implementation, namely, the simple binary tree (splaying
// temporarily disabled).
switch (dictionaryChoice) {
+ case FreeBlockDictionary<FreeChunk>::dictionaryBinaryTree:
+ _dictionary = new BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>(mr);
+ break;
case FreeBlockDictionary<FreeChunk>::dictionarySplayTree:
case FreeBlockDictionary<FreeChunk>::dictionarySkipList:
default:
warning("dictionaryChoice: selected option not understood; using"
" default BinaryTreeDictionary implementation instead.");
- case FreeBlockDictionary<FreeChunk>::dictionaryBinaryTree:
- _dictionary = new BinaryTreeDictionary<FreeChunk>(mr, use_adaptive_freelists);
- break;
}
assert(_dictionary != NULL, "CMS dictionary initialization");
// The indexed free lists are initially all empty and are lazily
@@ -453,7 +453,7 @@ const {
reportIndexedFreeListStatistics();
gclog_or_tty->print_cr("Layout of Indexed Freelists");
gclog_or_tty->print_cr("---------------------------");
- FreeList<FreeChunk>::print_labels_on(st, "size");
+ AdaptiveFreeList<FreeChunk>::print_labels_on(st, "size");
for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
_indexedFreeList[i].print_on(gclog_or_tty);
for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
@@ -1319,7 +1319,7 @@ FreeChunk* CompactibleFreeListSpace::getChunkFromGreater(size_t numWords) {
size_t currSize = numWords + MinChunkSize;
assert(currSize % MinObjAlignment == 0, "currSize should be aligned");
for (i = currSize; i < IndexSetSize; i += IndexSetStride) {
- FreeList<FreeChunk>* fl = &_indexedFreeList[i];
+ AdaptiveFreeList<FreeChunk>* fl = &_indexedFreeList[i];
if (fl->head()) {
ret = getFromListGreater(fl, numWords);
assert(ret == NULL || ret->is_free(), "Should be returning a free chunk");
@@ -1702,7 +1702,9 @@ CompactibleFreeListSpace::returnChunkToDictionary(FreeChunk* chunk) {
_dictionary->return_chunk(chunk);
#ifndef PRODUCT
if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
- TreeChunk<FreeChunk>::as_TreeChunk(chunk)->list()->verify_stats();
+ TreeChunk<FreeChunk, AdaptiveFreeList>* tc = TreeChunk<FreeChunk, AdaptiveFreeList>::as_TreeChunk(chunk);
+ TreeList<FreeChunk, AdaptiveFreeList>* tl = tc->list();
+ tl->verify_stats();
}
#endif // PRODUCT
}
@@ -1745,7 +1747,7 @@ CompactibleFreeListSpace::addChunkToFreeListsAtEndRecordingStats(
{
MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
ec = dictionary()->find_largest_dict(); // get largest block
- if (ec != NULL && ec->end() == chunk) {
+ if (ec != NULL && ec->end() == (uintptr_t*) chunk) {
// It's a coterminal block - we can coalesce.
size_t old_size = ec->size();
coalDeath(old_size);
@@ -1850,11 +1852,11 @@ FreeChunk* CompactibleFreeListSpace::bestFitSmall(size_t numWords) {
the excess is >= MIN_CHUNK. */
size_t start = align_object_size(numWords + MinChunkSize);
if (start < IndexSetSize) {
- FreeList<FreeChunk>* it = _indexedFreeList;
+ AdaptiveFreeList<FreeChunk>* it = _indexedFreeList;
size_t hint = _indexedFreeList[start].hint();
while (hint < IndexSetSize) {
assert(hint % MinObjAlignment == 0, "hint should be aligned");
- FreeList<FreeChunk> *fl = &_indexedFreeList[hint];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[hint];
if (fl->surplus() > 0 && fl->head() != NULL) {
// Found a list with surplus, reset original hint
// and split out a free chunk which is returned.
@@ -1873,7 +1875,7 @@ FreeChunk* CompactibleFreeListSpace::bestFitSmall(size_t numWords) {
}
/* Requires fl->size >= numWords + MinChunkSize */
-FreeChunk* CompactibleFreeListSpace::getFromListGreater(FreeList<FreeChunk>* fl,
+FreeChunk* CompactibleFreeListSpace::getFromListGreater(AdaptiveFreeList<FreeChunk>* fl,
size_t numWords) {
FreeChunk *curr = fl->head();
size_t oldNumWords = curr->size();
@@ -2155,7 +2157,7 @@ void CompactibleFreeListSpace::beginSweepFLCensus(
assert_locked();
size_t i;
for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
- FreeList<FreeChunk>* fl = &_indexedFreeList[i];
+ AdaptiveFreeList<FreeChunk>* fl = &_indexedFreeList[i];
if (PrintFLSStatistics > 1) {
gclog_or_tty->print("size[%d] : ", i);
}
@@ -2174,7 +2176,7 @@ void CompactibleFreeListSpace::setFLSurplus() {
assert_locked();
size_t i;
for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
- FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
fl->set_surplus(fl->count() -
(ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
}
@@ -2185,7 +2187,7 @@ void CompactibleFreeListSpace::setFLHints() {
size_t i;
size_t h = IndexSetSize;
for (i = IndexSetSize - 1; i != 0; i -= IndexSetStride) {
- FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
fl->set_hint(h);
if (fl->surplus() > 0) {
h = i;
@@ -2197,7 +2199,7 @@ void CompactibleFreeListSpace::clearFLCensus() {
assert_locked();
size_t i;
for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
- FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
fl->set_prev_sweep(fl->count());
fl->set_coal_births(0);
fl->set_coal_deaths(0);
@@ -2224,7 +2226,7 @@ void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) {
bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
if (size < SmallForDictionary) {
- FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
return (fl->coal_desired() < 0) ||
((int)fl->count() > fl->coal_desired());
} else {
@@ -2234,14 +2236,14 @@ bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
void CompactibleFreeListSpace::smallCoalBirth(size_t size) {
assert(size < SmallForDictionary, "Size too large for indexed list");
- FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
fl->increment_coal_births();
fl->increment_surplus();
}
void CompactibleFreeListSpace::smallCoalDeath(size_t size) {
assert(size < SmallForDictionary, "Size too large for indexed list");
- FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
fl->increment_coal_deaths();
fl->decrement_surplus();
}
@@ -2250,7 +2252,7 @@ void CompactibleFreeListSpace::coalBirth(size_t size) {
if (size < SmallForDictionary) {
smallCoalBirth(size);
} else {
- dictionary()->dict_census_udpate(size,
+ dictionary()->dict_census_update(size,
false /* split */,
true /* birth */);
}
@@ -2260,7 +2262,7 @@ void CompactibleFreeListSpace::coalDeath(size_t size) {
if(size < SmallForDictionary) {
smallCoalDeath(size);
} else {
- dictionary()->dict_census_udpate(size,
+ dictionary()->dict_census_update(size,
false /* split */,
false /* birth */);
}
@@ -2268,14 +2270,14 @@ void CompactibleFreeListSpace::coalDeath(size_t size) {
void CompactibleFreeListSpace::smallSplitBirth(size_t size) {
assert(size < SmallForDictionary, "Size too large for indexed list");
- FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
fl->increment_split_births();
fl->increment_surplus();
}
void CompactibleFreeListSpace::smallSplitDeath(size_t size) {
assert(size < SmallForDictionary, "Size too large for indexed list");
- FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+ AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
fl->increment_split_deaths();
fl->decrement_surplus();
}
@@ -2284,7 +2286,7 @@ void CompactibleFreeListSpace::split_birth(size_t size) {
if (size < SmallForDictionary) {
smallSplitBirth(size);
} else {
- dictionary()->dict_census_udpate(size,
+ dictionary()->dict_census_update(size,
true /* split */,
true /* birth */);
}
@@ -2294,7 +2296,7 @@ void CompactibleFreeListSpace::splitDeath(size_t size) {
if (size < SmallForDictionary) {
smallSplitDeath(size);
} else {
- dictionary()->dict_census_udpate(size,
+ dictionary()->dict_census_update(size,
true /* split */,
false /* birth */);
}
@@ -2517,10 +2519,10 @@ void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
#ifndef PRODUCT
void CompactibleFreeListSpace::check_free_list_consistency() const {
- assert(_dictionary->min_size() <= IndexSetSize,
+ assert((TreeChunk<FreeChunk, AdaptiveFreeList>::min_size() <= IndexSetSize),
"Some sizes can't be allocated without recourse to"
" linear allocation buffers");
- assert(BinaryTreeDictionary<FreeChunk>::min_tree_chunk_size*HeapWordSize == sizeof(TreeChunk<FreeChunk>),
+ assert((TreeChunk<FreeChunk, AdaptiveFreeList>::min_size()*HeapWordSize == sizeof(TreeChunk<FreeChunk, AdaptiveFreeList>)),
"else MIN_TREE_CHUNK_SIZE is wrong");
assert(IndexSetStart != 0, "IndexSetStart not initialized");
assert(IndexSetStride != 0, "IndexSetStride not initialized");
@@ -2529,15 +2531,15 @@ void CompactibleFreeListSpace::check_free_list_consistency() const {
void CompactibleFreeListSpace::printFLCensus(size_t sweep_count) const {
assert_lock_strong(&_freelistLock);
- FreeList<FreeChunk> total;
+ AdaptiveFreeList<FreeChunk> total;
gclog_or_tty->print("end sweep# " SIZE_FORMAT "\n", sweep_count);
- FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+ AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
size_t total_free = 0;
for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
- const FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+ const AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
total_free += fl->count() * fl->size();
if (i % (40*IndexSetStride) == 0) {
- FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+ AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
}
fl->print_on(gclog_or_tty);
total.set_bfr_surp( total.bfr_surp() + fl->bfr_surp() );
@@ -2620,7 +2622,7 @@ HeapWord* CFLS_LAB::alloc(size_t word_sz) {
res = _cfls->getChunkFromDictionaryExact(word_sz);
if (res == NULL) return NULL;
} else {
- FreeList<FreeChunk>* fl = &_indexedFreeList[word_sz];
+ AdaptiveFreeList<FreeChunk>* fl = &_indexedFreeList[word_sz];
if (fl->count() == 0) {
// Attempt to refill this local free list.
get_from_global_pool(word_sz, fl);
@@ -2640,7 +2642,7 @@ HeapWord* CFLS_LAB::alloc(size_t word_sz) {
// Get a chunk of blocks of the right size and update related
// book-keeping stats
-void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl) {
+void CFLS_LAB::get_from_global_pool(size_t word_sz, AdaptiveFreeList<FreeChunk>* fl) {
// Get the #blocks we want to claim
size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
assert(n_blks > 0, "Error");
@@ -2722,7 +2724,7 @@ void CFLS_LAB::retire(int tid) {
if (num_retire > 0) {
_cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
// Reset this list.
- _indexedFreeList[i] = FreeList<FreeChunk>();
+ _indexedFreeList[i] = AdaptiveFreeList<FreeChunk>();
_indexedFreeList[i].set_size(i);
}
}
@@ -2736,7 +2738,7 @@ void CFLS_LAB::retire(int tid) {
}
}
-void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl) {
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList<FreeChunk>* fl) {
assert(fl->count() == 0, "Precondition.");
assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
"Precondition");
@@ -2752,12 +2754,12 @@ void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n
(cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
(CMSSplitIndexedFreeListBlocks || k <= 1);
k++, cur_sz = k * word_sz) {
- FreeList<FreeChunk> fl_for_cur_sz; // Empty.
+ AdaptiveFreeList<FreeChunk> fl_for_cur_sz; // Empty.
fl_for_cur_sz.set_size(cur_sz);
{
MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
Mutex::_no_safepoint_check_flag);
- FreeList<FreeChunk>* gfl = &_indexedFreeList[cur_sz];
+ AdaptiveFreeList<FreeChunk>* gfl = &_indexedFreeList[cur_sz];
if (gfl->count() != 0) {
// nn is the number of chunks of size cur_sz that
// we'd need to split k-ways each, in order to create
@@ -2832,12 +2834,11 @@ void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n
MutexLockerEx x(parDictionaryAllocLock(),
Mutex::_no_safepoint_check_flag);
while (n > 0) {
- fc = dictionary()->get_chunk(MAX2(n * word_sz,
- _dictionary->min_size()),
+ fc = dictionary()->get_chunk(MAX2(n * word_sz, _dictionary->min_size()),
FreeBlockDictionary<FreeChunk>::atLeast);
if (fc != NULL) {
_bt.allocated((HeapWord*)fc, fc->size(), true /* reducing */); // update _unallocated_blk
- dictionary()->dict_census_udpate(fc->size(),
+ dictionary()->dict_census_update(fc->size(),
true /*split*/,
false /*birth*/);
break;
@@ -2890,7 +2891,7 @@ void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n
fc->set_size(prefix_size);
if (rem >= IndexSetSize) {
returnChunkToDictionary(rem_fc);
- dictionary()->dict_census_udpate(rem, true /*split*/, true /*birth*/);
+ dictionary()->dict_census_update(rem, true /*split*/, true /*birth*/);
rem_fc = NULL;
}
// Otherwise, return it to the small list below.
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
index 4d247356d..1b3d93ed2 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
@@ -25,6 +25,7 @@
#ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
#define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
#include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp"
#include "memory/binaryTreeDictionary.hpp"
#include "memory/blockOffsetTable.inline.hpp"
@@ -38,6 +39,7 @@
class CompactibleFreeListSpace;
class BlkClosure;
class BlkClosureCareful;
+class FreeChunk;
class UpwardsObjectClosure;
class ObjectClosureCareful;
class Klass;
@@ -131,7 +133,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {
FreeBlockDictionary<FreeChunk>::DictionaryChoice _dictionaryChoice;
FreeBlockDictionary<FreeChunk>* _dictionary; // ptr to dictionary for large size blocks
- FreeList<FreeChunk> _indexedFreeList[IndexSetSize];
+ AdaptiveFreeList<FreeChunk> _indexedFreeList[IndexSetSize];
// indexed array for small size blocks
// allocation stategy
bool _fitStrategy; // Use best fit strategy.
@@ -168,7 +170,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {
// If the count of "fl" is negative, it's absolute value indicates a
// number of free chunks that had been previously "borrowed" from global
// list of size "word_sz", and must now be decremented.
- void par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl);
+ void par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList<FreeChunk>* fl);
// Allocation helper functions
// Allocate using a strategy that takes from the indexed free lists
@@ -214,7 +216,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {
// and return it. The split off remainder is returned to
// the free lists. The old name for getFromListGreater
// was lookInListGreater.
- FreeChunk* getFromListGreater(FreeList<FreeChunk>* fl, size_t numWords);
+ FreeChunk* getFromListGreater(AdaptiveFreeList<FreeChunk>* fl, size_t numWords);
// Get a chunk in the indexed free list or dictionary,
// by considering a larger chunk and splitting it.
FreeChunk* getChunkFromGreater(size_t numWords);
@@ -621,7 +623,7 @@ class CFLS_LAB : public CHeapObj<mtGC> {
CompactibleFreeListSpace* _cfls;
// Our local free lists.
- FreeList<FreeChunk> _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
+ AdaptiveFreeList<FreeChunk> _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
// Initialized from a command-line arg.
@@ -634,7 +636,7 @@ class CFLS_LAB : public CHeapObj<mtGC> {
size_t _num_blocks [CompactibleFreeListSpace::IndexSetSize];
// Internal work method
- void get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl);
+ void get_from_global_pool(size_t word_sz, AdaptiveFreeList<FreeChunk>* fl);
public:
CFLS_LAB(CompactibleFreeListSpace* cfls);
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index ac8b70017..475f2b8fe 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -9143,7 +9143,7 @@ void ASConcurrentMarkSweepGeneration::shrink_by(size_t desired_bytes) {
size_t shrinkable_size_in_bytes = chunk_at_end->size();
size_t aligned_shrinkable_size_in_bytes =
align_size_down(shrinkable_size_in_bytes, os::vm_page_size());
- assert(unallocated_start <= chunk_at_end->end(),
+ assert(unallocated_start <= (HeapWord*) chunk_at_end->end(),
"Inconsistent chunk at end of space");
size_t bytes = MIN2(desired_bytes, aligned_shrinkable_size_in_bytes);
size_t word_size_before = heap_word_size(_virtual_space.committed_size());
@@ -9210,7 +9210,7 @@ void ASConcurrentMarkSweepGeneration::shrink_by(size_t desired_bytes) {
assert(_cmsSpace->unallocated_block() <= _cmsSpace->end(),
"Inconsistency at end of space");
- assert(chunk_at_end->end() == _cmsSpace->end(),
+ assert(chunk_at_end->end() == (uintptr_t*) _cmsSpace->end(),
"Shrinking is inconsistent");
return;
}
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp
index 21934bca8..8376e8798 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp
@@ -133,7 +133,7 @@ class FreeChunk VALUE_OBJ_CLASS_SPEC {
}
// Return the address past the end of this chunk
- HeapWord* end() const { return ((HeapWord*) this) + size(); }
+ uintptr_t* end() const { return ((uintptr_t*) this) + size(); }
// debugging
void verify() const PRODUCT_RETURN;
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp
index c393a5528..b722779b8 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp
@@ -25,6 +25,8 @@
#ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP
#define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP
+typedef BinaryTreeDictionary<FreeChunk, AdaptiveFreeList> AFLBinaryTreeDictionary;
+
#define VM_STRUCTS_CMS(nonstatic_field, \
volatile_nonstatic_field, \
static_field) \
@@ -38,14 +40,8 @@
nonstatic_field(CMSCollector, _markBitMap, CMSBitMap) \
nonstatic_field(ConcurrentMarkSweepGeneration, _cmsSpace, CompactibleFreeListSpace*) \
static_field(ConcurrentMarkSweepThread, _collector, CMSCollector*) \
- volatile_nonstatic_field(FreeChunk, _size, size_t) \
- nonstatic_field(FreeChunk, _next, FreeChunk*) \
- nonstatic_field(FreeChunk, _prev, FreeChunk*) \
nonstatic_field(LinearAllocBlock, _word_size, size_t) \
- nonstatic_field(FreeList<FreeChunk>, _size, size_t) \
- nonstatic_field(FreeList<FreeChunk>, _count, ssize_t) \
- nonstatic_field(BinaryTreeDictionary<FreeChunk>,_total_size, size_t) \
- nonstatic_field(CompactibleFreeListSpace, _dictionary, FreeBlockDictionary<FreeChunk>*) \
+ nonstatic_field(AFLBinaryTreeDictionary, _total_size, size_t) \
nonstatic_field(CompactibleFreeListSpace, _indexedFreeList[0], FreeList<FreeChunk>) \
nonstatic_field(CompactibleFreeListSpace, _smallLinearAllocBlock, LinearAllocBlock)
@@ -60,19 +56,17 @@
declare_toplevel_type(CMSCollector) \
declare_toplevel_type(CMSBitMap) \
declare_toplevel_type(FreeChunk) \
+ declare_toplevel_type(Metablock) \
declare_toplevel_type(ConcurrentMarkSweepThread*) \
declare_toplevel_type(ConcurrentMarkSweepGeneration*) \
declare_toplevel_type(SurrogateLockerThread*) \
declare_toplevel_type(CompactibleFreeListSpace*) \
declare_toplevel_type(CMSCollector*) \
- declare_toplevel_type(FreeChunk*) \
- declare_toplevel_type(BinaryTreeDictionary<FreeChunk>*) \
- declare_toplevel_type(FreeBlockDictionary<FreeChunk>*) \
- declare_toplevel_type(FreeList<FreeChunk>*) \
- declare_toplevel_type(FreeList<FreeChunk>) \
+ declare_toplevel_type(AFLBinaryTreeDictionary*) \
declare_toplevel_type(LinearAllocBlock) \
declare_toplevel_type(FreeBlockDictionary<FreeChunk>) \
- declare_type(BinaryTreeDictionary<FreeChunk>, FreeBlockDictionary<FreeChunk>)
+ declare_type(AFLBinaryTreeDictionary, FreeBlockDictionary<FreeChunk>) \
+ declare_type(AFLBinaryTreeDictionary, FreeBlockDictionary<FreeChunk>) \
#define VM_INT_CONSTANTS_CMS(declare_constant) \
declare_constant(Generation::ConcurrentMarkSweep) \
diff --git a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp
index 3966877e4..285ef97e3 100644
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp
@@ -191,7 +191,7 @@ class VM_GenCollectFull: public VM_GC_Operation {
class VM_CollectForMetadataAllocation: public VM_GC_Operation {
private:
MetaWord* _result;
- size_t _size; // size of object to be allocated
+ size_t _size; // size of object to be allocated
Metaspace::MetadataType _mdtype;
ClassLoaderData* _loader_data;
public:
diff --git a/src/share/vm/memory/allocation.cpp b/src/share/vm/memory/allocation.cpp
index 56c7b63fb..92b335b10 100644
--- a/src/share/vm/memory/allocation.cpp
+++ b/src/share/vm/memory/allocation.cpp
@@ -433,19 +433,18 @@ Arena::Arena() {
NOT_PRODUCT(Atomic::inc(&_instance_count);)
}
-Arena::Arena(Arena *a) : _chunk(a->_chunk), _hwm(a->_hwm), _max(a->_max), _first(a->_first) {
- set_size_in_bytes(a->size_in_bytes());
- NOT_PRODUCT(Atomic::inc(&_instance_count);)
-}
-
-
Arena *Arena::move_contents(Arena *copy) {
copy->destruct_contents();
copy->_chunk = _chunk;
copy->_hwm = _hwm;
copy->_max = _max;
copy->_first = _first;
- copy->set_size_in_bytes(size_in_bytes());
+
+ // workaround rare racing condition, which could double count
+ // the arena size by native memory tracking
+ size_t size = size_in_bytes();
+ set_size_in_bytes(0);
+ copy->set_size_in_bytes(size);
// Destroy original arena
reset();
return copy; // Return Arena with contents
@@ -497,6 +496,9 @@ void Arena::destruct_contents() {
char* end = _first->next() ? _first->top() : _hwm;
free_malloced_objects(_first, _first->bottom(), end, _hwm);
}
+ // reset size before chop to avoid a rare racing condition
+ // that can have total arena memory exceed total chunk memory
+ set_size_in_bytes(0);
_first->chop();
reset();
}
diff --git a/src/share/vm/memory/allocation.hpp b/src/share/vm/memory/allocation.hpp
index 30662b8e5..892e50ddc 100644
--- a/src/share/vm/memory/allocation.hpp
+++ b/src/share/vm/memory/allocation.hpp
@@ -144,8 +144,10 @@ enum MemoryType {
mtNMT = 0x0A00, // memory used by native memory tracking
mtChunk = 0x0B00, // chunk that holds content of arenas
mtJavaHeap = 0x0C00, // Java heap
- mtDontTrack = 0x0D00, // memory we donot or cannot track
- mt_number_of_types = 0x000C, // number of memory types
+ mtClassShared = 0x0D00, // class data sharing
+ mt_number_of_types = 0x000D, // number of memory types (mtDontTrack
+ // is not included as validate type)
+ mtDontTrack = 0x0E00, // memory we do not or cannot track
mt_masks = 0x7F00,
// object type mask
@@ -342,7 +344,6 @@ protected:
public:
Arena();
Arena(size_t init_size);
- Arena(Arena *old);
~Arena();
void destruct_contents();
char* hwm() const { return _hwm; }
diff --git a/src/share/vm/memory/binaryTreeDictionary.cpp b/src/share/vm/memory/binaryTreeDictionary.cpp
index 08a9b0334..f79d149f1 100644
--- a/src/share/vm/memory/binaryTreeDictionary.cpp
+++ b/src/share/vm/memory/binaryTreeDictionary.cpp
@@ -25,9 +25,15 @@
#include "precompiled.hpp"
#include "gc_implementation/shared/allocationStats.hpp"
#include "memory/binaryTreeDictionary.hpp"
+#include "memory/freeList.hpp"
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
#include "runtime/globals.hpp"
#include "utilities/ostream.hpp"
#ifndef SERIALGC
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
#include "gc_implementation/shared/spaceDecorator.hpp"
#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
#endif // SERIALGC
@@ -37,15 +43,18 @@
// This is currently used in the Concurrent Mark&Sweep implementation.
////////////////////////////////////////////////////////////////////////////////
-template <class Chunk>
-TreeChunk<Chunk>* TreeChunk<Chunk>::as_TreeChunk(Chunk* fc) {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t TreeChunk<Chunk_t, FreeList_t>::_min_tree_chunk_size = sizeof(TreeChunk<Chunk_t, FreeList_t>)/HeapWordSize;
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(Chunk_t* fc) {
// Do some assertion checking here.
- return (TreeChunk<Chunk>*) fc;
+ return (TreeChunk<Chunk_t, FreeList_t>*) fc;
}
-template <class Chunk>
-void TreeChunk<Chunk>::verify_tree_chunk_list() const {
- TreeChunk<Chunk>* nextTC = (TreeChunk<Chunk>*)next();
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeChunk<Chunk_t, FreeList_t>::verify_tree_chunk_list() const {
+ TreeChunk<Chunk_t, FreeList_t>* nextTC = (TreeChunk<Chunk_t, FreeList_t>*)next();
if (prev() != NULL) { // interior list node shouldn'r have tree fields
guarantee(embedded_list()->parent() == NULL && embedded_list()->left() == NULL &&
embedded_list()->right() == NULL, "should be clear");
@@ -57,53 +66,113 @@ void TreeChunk<Chunk>::verify_tree_chunk_list() const {
}
}
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>::TreeList() {}
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::as_TreeList(TreeChunk<Chunk>* tc) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::as_TreeList(TreeChunk<Chunk_t,FreeList_t>* tc) {
// This first free chunk in the list will be the tree list.
- assert(tc->size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
- TreeList<Chunk>* tl = tc->embedded_list();
+ assert((tc->size() >= (TreeChunk<Chunk_t, FreeList_t>::min_size())),
+ "Chunk is too small for a TreeChunk");
+ TreeList<Chunk_t, FreeList_t>* tl = tc->embedded_list();
+ tl->initialize();
tc->set_list(tl);
-#ifdef ASSERT
- tl->set_protecting_lock(NULL);
-#endif
- tl->set_hint(0);
tl->set_size(tc->size());
tl->link_head(tc);
tl->link_tail(tc);
tl->set_count(1);
- tl->init_statistics(true /* split_birth */);
- tl->set_parent(NULL);
- tl->set_left(NULL);
- tl->set_right(NULL);
+
return tl;
}
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::as_TreeList(HeapWord* addr, size_t size) {
- TreeChunk<Chunk>* tc = (TreeChunk<Chunk>*) addr;
- assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
- // The space in the heap will have been mangled initially but
- // is not remangled when a free chunk is returned to the free list
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+get_chunk(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither) {
+ FreeBlockDictionary<Chunk_t>::verify_par_locked();
+ Chunk_t* res = get_chunk_from_tree(size, dither);
+ assert(res == NULL || res->is_free(),
+ "Should be returning a free chunk");
+ assert(dither != FreeBlockDictionary<Chunk_t>::exactly ||
+ res->size() == size, "Not correct size");
+ return res;
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::as_TreeList(HeapWord* addr, size_t size) {
+ TreeChunk<Chunk_t, FreeList_t>* tc = (TreeChunk<Chunk_t, FreeList_t>*) addr;
+ assert((size >= TreeChunk<Chunk_t, FreeList_t>::min_size()),
+ "Chunk is too small for a TreeChunk");
+ // The space will have been mangled initially but
+ // is not remangled when a Chunk_t is returned to the free list
// (since it is used to maintain the chunk on the free list).
- assert((ZapUnusedHeapArea &&
- SpaceMangler::is_mangled((HeapWord*) tc->size_addr()) &&
- SpaceMangler::is_mangled((HeapWord*) tc->prev_addr()) &&
- SpaceMangler::is_mangled((HeapWord*) tc->next_addr())) ||
- (tc->size() == 0 && tc->prev() == NULL && tc->next() == NULL),
- "Space should be clear or mangled");
+ tc->assert_is_mangled();
tc->set_size(size);
tc->link_prev(NULL);
tc->link_next(NULL);
- TreeList<Chunk>* tl = TreeList<Chunk>::as_TreeList(tc);
+ TreeList<Chunk_t, FreeList_t>* tl = TreeList<Chunk_t, FreeList_t>::as_TreeList(tc);
return tl;
}
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc) {
- TreeList<Chunk>* retTL = this;
- Chunk* list = head();
+#ifndef SERIALGC
+// Specialize for AdaptiveFreeList which tries to avoid
+// splitting a chunk of a size that is under populated in favor of
+// an over populated size. The general get_better_list() just returns
+// the current list.
+template <>
+TreeList<FreeChunk, AdaptiveFreeList>*
+TreeList<FreeChunk, AdaptiveFreeList>::get_better_list(
+ BinaryTreeDictionary<FreeChunk, ::AdaptiveFreeList>* dictionary) {
+ // A candidate chunk has been found. If it is already under
+ // populated, get a chunk associated with the hint for this
+ // chunk.
+
+ TreeList<FreeChunk, ::AdaptiveFreeList>* curTL = this;
+ if (surplus() <= 0) {
+ /* Use the hint to find a size with a surplus, and reset the hint. */
+ TreeList<FreeChunk, ::AdaptiveFreeList>* hintTL = this;
+ while (hintTL->hint() != 0) {
+ assert(hintTL->hint() > hintTL->size(),
+ "hint points in the wrong direction");
+ hintTL = dictionary->find_list(hintTL->hint());
+ assert(curTL != hintTL, "Infinite loop");
+ if (hintTL == NULL ||
+ hintTL == curTL /* Should not happen but protect against it */ ) {
+ // No useful hint. Set the hint to NULL and go on.
+ curTL->set_hint(0);
+ break;
+ }
+ assert(hintTL->size() > curTL->size(), "hint is inconsistent");
+ if (hintTL->surplus() > 0) {
+ // The hint led to a list that has a surplus. Use it.
+ // Set the hint for the candidate to an overpopulated
+ // size.
+ curTL->set_hint(hintTL->size());
+ // Change the candidate.
+ curTL = hintTL;
+ break;
+ }
+ }
+ }
+ return curTL;
+}
+#endif // SERIALGC
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::get_better_list(
+ BinaryTreeDictionary<Chunk_t, FreeList_t>* dictionary) {
+ return this;
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::remove_chunk_replace_if_needed(TreeChunk<Chunk_t, FreeList_t>* tc) {
+
+ TreeList<Chunk_t, FreeList_t>* retTL = this;
+ Chunk_t* list = head();
assert(!list || list != list->next(), "Chunk on list twice");
assert(tc != NULL, "Chunk being removed is NULL");
assert(parent() == NULL || this == parent()->left() ||
@@ -112,13 +181,13 @@ TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
- Chunk* prevFC = tc->prev();
- TreeChunk<Chunk>* nextTC = TreeChunk<Chunk>::as_TreeChunk(tc->next());
+ Chunk_t* prevFC = tc->prev();
+ TreeChunk<Chunk_t, FreeList_t>* nextTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(tc->next());
assert(list != NULL, "should have at least the target chunk");
// Is this the first item on the list?
if (tc == list) {
- // The "getChunk..." functions for a TreeList<Chunk> will not return the
+ // The "getChunk..." functions for a TreeList<Chunk_t, FreeList_t> will not return the
// first chunk in the list unless it is the last chunk in the list
// because the first chunk is also acting as the tree node.
// When coalescing happens, however, the first chunk in the a tree
@@ -127,8 +196,8 @@ TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk
// allocated when the sweeper yields (giving up the free list lock)
// to allow mutator activity. If this chunk is the first in the
// list and is not the last in the list, do the work to copy the
- // TreeList<Chunk> from the first chunk to the next chunk and update all
- // the TreeList<Chunk> pointers in the chunks in the list.
+ // TreeList<Chunk_t, FreeList_t> from the first chunk to the next chunk and update all
+ // the TreeList<Chunk_t, FreeList_t> pointers in the chunks in the list.
if (nextTC == NULL) {
assert(prevFC == NULL, "Not last chunk in the list");
set_tail(NULL);
@@ -141,11 +210,11 @@ TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk
// This can be slow for a long list. Consider having
// an option that does not allow the first chunk on the
// list to be coalesced.
- for (TreeChunk<Chunk>* curTC = nextTC; curTC != NULL;
- curTC = TreeChunk<Chunk>::as_TreeChunk(curTC->next())) {
+ for (TreeChunk<Chunk_t, FreeList_t>* curTC = nextTC; curTC != NULL;
+ curTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(curTC->next())) {
curTC->set_list(retTL);
}
- // Fix the parent to point to the new TreeList<Chunk>.
+ // Fix the parent to point to the new TreeList<Chunk_t, FreeList_t>.
if (retTL->parent() != NULL) {
if (this == retTL->parent()->left()) {
retTL->parent()->set_left(retTL);
@@ -176,9 +245,9 @@ TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk
prevFC->link_after(nextTC);
}
- // Below this point the embeded TreeList<Chunk> being used for the
+ // Below this point the embeded TreeList<Chunk_t, FreeList_t> being used for the
// tree node may have changed. Don't use "this"
- // TreeList<Chunk>*.
+ // TreeList<Chunk_t, FreeList_t>*.
// chunk should still be a free chunk (bit set in _prev)
assert(!retTL->head() || retTL->size() == retTL->head()->size(),
"Wrong sized chunk in list");
@@ -188,7 +257,7 @@ TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk
tc->set_list(NULL);
bool prev_found = false;
bool next_found = false;
- for (Chunk* curFC = retTL->head();
+ for (Chunk_t* curFC = retTL->head();
curFC != NULL; curFC = curFC->next()) {
assert(curFC != tc, "Chunk is still in list");
if (curFC == prevFC) {
@@ -215,8 +284,8 @@ TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk
return retTL;
}
-template <class Chunk>
-void TreeList<Chunk>::return_chunk_at_tail(TreeChunk<Chunk>* chunk) {
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeList<Chunk_t, FreeList_t>::return_chunk_at_tail(TreeChunk<Chunk_t, FreeList_t>* chunk) {
assert(chunk != NULL, "returning NULL chunk");
assert(chunk->list() == this, "list should be set for chunk");
assert(tail() != NULL, "The tree list is embedded in the first chunk");
@@ -225,12 +294,12 @@ void TreeList<Chunk>::return_chunk_at_tail(TreeChunk<Chunk>* chunk) {
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
- Chunk* fc = tail();
+ Chunk_t* fc = tail();
fc->link_after(chunk);
link_tail(chunk);
assert(!tail() || size() == tail()->size(), "Wrong sized chunk in list");
- increment_count();
+ FreeList_t<Chunk_t>::increment_count();
debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
@@ -238,10 +307,10 @@ void TreeList<Chunk>::return_chunk_at_tail(TreeChunk<Chunk>* chunk) {
// Add this chunk at the head of the list. "At the head of the list"
// is defined to be after the chunk pointer to by head(). This is
-// because the TreeList<Chunk> is embedded in the first TreeChunk<Chunk> in the
-// list. See the definition of TreeChunk<Chunk>.
-template <class Chunk>
-void TreeList<Chunk>::return_chunk_at_head(TreeChunk<Chunk>* chunk) {
+// because the TreeList<Chunk_t, FreeList_t> is embedded in the first TreeChunk<Chunk_t, FreeList_t> in the
+// list. See the definition of TreeChunk<Chunk_t, FreeList_t>.
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeList<Chunk_t, FreeList_t>::return_chunk_at_head(TreeChunk<Chunk_t, FreeList_t>* chunk) {
assert(chunk->list() == this, "list should be set for chunk");
assert(head() != NULL, "The tree list is embedded in the first chunk");
assert(chunk != NULL, "returning NULL chunk");
@@ -249,7 +318,7 @@ void TreeList<Chunk>::return_chunk_at_head(TreeChunk<Chunk>* chunk) {
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
- Chunk* fc = head()->next();
+ Chunk_t* fc = head()->next();
if (fc != NULL) {
chunk->link_after(fc);
} else {
@@ -258,28 +327,38 @@ void TreeList<Chunk>::return_chunk_at_head(TreeChunk<Chunk>* chunk) {
}
head()->link_after(chunk);
assert(!head() || size() == head()->size(), "Wrong sized chunk in list");
- increment_count();
+ FreeList_t<Chunk_t>::increment_count();
debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
}
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::head_as_TreeChunk() {
- assert(head() == NULL || TreeChunk<Chunk>::as_TreeChunk(head())->list() == this,
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeChunk<Chunk_t, FreeList_t>::assert_is_mangled() const {
+ assert((ZapUnusedHeapArea &&
+ SpaceMangler::is_mangled((HeapWord*) Chunk_t::size_addr()) &&
+ SpaceMangler::is_mangled((HeapWord*) Chunk_t::prev_addr()) &&
+ SpaceMangler::is_mangled((HeapWord*) Chunk_t::next_addr())) ||
+ (size() == 0 && prev() == NULL && next() == NULL),
+ "Space should be clear or mangled");
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::head_as_TreeChunk() {
+ assert(head() == NULL || (TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(head())->list() == this),
"Wrong type of chunk?");
- return TreeChunk<Chunk>::as_TreeChunk(head());
+ return TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(head());
}
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::first_available() {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::first_available() {
assert(head() != NULL, "The head of the list cannot be NULL");
- Chunk* fc = head()->next();
- TreeChunk<Chunk>* retTC;
+ Chunk_t* fc = head()->next();
+ TreeChunk<Chunk_t, FreeList_t>* retTC;
if (fc == NULL) {
retTC = head_as_TreeChunk();
} else {
- retTC = TreeChunk<Chunk>::as_TreeChunk(fc);
+ retTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(fc);
}
assert(retTC->list() == this, "Wrong type of chunk.");
return retTC;
@@ -288,41 +367,32 @@ TreeChunk<Chunk>* TreeList<Chunk>::first_available() {
// Returns the block with the largest heap address amongst
// those in the list for this size; potentially slow and expensive,
// use with caution!
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::largest_address() {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::largest_address() {
assert(head() != NULL, "The head of the list cannot be NULL");
- Chunk* fc = head()->next();
- TreeChunk<Chunk>* retTC;
+ Chunk_t* fc = head()->next();
+ TreeChunk<Chunk_t, FreeList_t>* retTC;
if (fc == NULL) {
retTC = head_as_TreeChunk();
} else {
// walk down the list and return the one with the highest
// heap address among chunks of this size.
- Chunk* last = fc;
+ Chunk_t* last = fc;
while (fc->next() != NULL) {
if ((HeapWord*)last < (HeapWord*)fc) {
last = fc;
}
fc = fc->next();
}
- retTC = TreeChunk<Chunk>::as_TreeChunk(last);
+ retTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(last);
}
assert(retTC->list() == this, "Wrong type of chunk.");
return retTC;
}
-template <class Chunk>
-BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(bool adaptive_freelists, bool splay) :
- _splay(splay), _adaptive_freelists(adaptive_freelists),
- _total_size(0), _total_free_blocks(0), _root(0) {}
-
-template <class Chunk>
-BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(MemRegion mr,
- bool adaptive_freelists,
- bool splay):
- _adaptive_freelists(adaptive_freelists), _splay(splay)
-{
- assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+template <class Chunk_t, template <class> class FreeList_t>
+BinaryTreeDictionary<Chunk_t, FreeList_t>::BinaryTreeDictionary(MemRegion mr) {
+ assert((mr.byte_size() > min_size()), "minimum chunk size");
reset(mr);
assert(root()->left() == NULL, "reset check failed");
@@ -333,52 +403,48 @@ BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(MemRegion mr,
assert(total_free_blocks() == 1, "reset check failed");
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::inc_total_size(size_t inc) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::inc_total_size(size_t inc) {
_total_size = _total_size + inc;
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::dec_total_size(size_t dec) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::dec_total_size(size_t dec) {
_total_size = _total_size - dec;
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset(MemRegion mr) {
- assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
- set_root(TreeList<Chunk>::as_TreeList(mr.start(), mr.word_size()));
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset(MemRegion mr) {
+ assert((mr.byte_size() > min_size()), "minimum chunk size");
+ set_root(TreeList<Chunk_t, FreeList_t>::as_TreeList(mr.start(), mr.word_size()));
set_total_size(mr.word_size());
set_total_free_blocks(1);
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset(HeapWord* addr, size_t byte_size) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset(HeapWord* addr, size_t byte_size) {
MemRegion mr(addr, heap_word_size(byte_size));
reset(mr);
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset() {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset() {
set_root(NULL);
set_total_size(0);
set_total_free_blocks(0);
}
// Get a free block of size at least size from tree, or NULL.
-// If a splay step is requested, the removal algorithm (only) incorporates
-// a splay step as follows:
-// . the search proceeds down the tree looking for a possible
-// match. At the (closest) matching location, an appropriate splay step is applied
-// (zig, zig-zig or zig-zag). A chunk of the appropriate size is then returned
-// if available, and if it's the last chunk, the node is deleted. A deteleted
-// node is replaced in place by its tree successor.
-template <class Chunk>
-TreeChunk<Chunk>*
-BinaryTreeDictionary<Chunk>::get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay)
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>*
+BinaryTreeDictionary<Chunk_t, FreeList_t>::get_chunk_from_tree(
+ size_t size,
+ enum FreeBlockDictionary<Chunk_t>::Dither dither)
{
- TreeList<Chunk> *curTL, *prevTL;
- TreeChunk<Chunk>* retTC = NULL;
- assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+ TreeList<Chunk_t, FreeList_t> *curTL, *prevTL;
+ TreeChunk<Chunk_t, FreeList_t>* retTC = NULL;
+
+ assert((size >= min_size()), "minimum chunk size");
if (FLSVerifyDictionary) {
verify_tree();
}
@@ -398,7 +464,7 @@ BinaryTreeDictionary<Chunk>::get_chunk_from_tree(size_t size, enum FreeBlockDict
}
if (curTL == NULL) { // couldn't find exact match
- if (dither == FreeBlockDictionary<Chunk>::exactly) return NULL;
+ if (dither == FreeBlockDictionary<Chunk_t>::exactly) return NULL;
// try and find the next larger size by walking back up the search path
for (curTL = prevTL; curTL != NULL;) {
@@ -410,46 +476,9 @@ BinaryTreeDictionary<Chunk>::get_chunk_from_tree(size_t size, enum FreeBlockDict
}
if (curTL != NULL) {
assert(curTL->size() >= size, "size inconsistency");
- if (adaptive_freelists()) {
-
- // A candidate chunk has been found. If it is already under
- // populated, get a chunk associated with the hint for this
- // chunk.
- if (curTL->surplus() <= 0) {
- /* Use the hint to find a size with a surplus, and reset the hint. */
- TreeList<Chunk>* hintTL = curTL;
- while (hintTL->hint() != 0) {
- assert(hintTL->hint() == 0 || hintTL->hint() > hintTL->size(),
- "hint points in the wrong direction");
- hintTL = find_list(hintTL->hint());
- assert(curTL != hintTL, "Infinite loop");
- if (hintTL == NULL ||
- hintTL == curTL /* Should not happen but protect against it */ ) {
- // No useful hint. Set the hint to NULL and go on.
- curTL->set_hint(0);
- break;
- }
- assert(hintTL->size() > size, "hint is inconsistent");
- if (hintTL->surplus() > 0) {
- // The hint led to a list that has a surplus. Use it.
- // Set the hint for the candidate to an overpopulated
- // size.
- curTL->set_hint(hintTL->size());
- // Change the candidate.
- curTL = hintTL;
- break;
- }
- // The evm code reset the hint of the candidate as
- // at an interim point. Why? Seems like this leaves
- // the hint pointing to a list that didn't work.
- // curTL->set_hint(hintTL->size());
- }
- }
- }
- // don't waste time splaying if chunk's singleton
- if (splay && curTL->head()->next() != NULL) {
- semi_splay_step(curTL);
- }
+
+ curTL = curTL->get_better_list(this);
+
retTC = curTL->first_available();
assert((retTC != NULL) && (curTL->count() > 0),
"A list in the binary tree should not be NULL");
@@ -465,9 +494,9 @@ BinaryTreeDictionary<Chunk>::get_chunk_from_tree(size_t size, enum FreeBlockDict
return retTC;
}
-template <class Chunk>
-TreeList<Chunk>* BinaryTreeDictionary<Chunk>::find_list(size_t size) const {
- TreeList<Chunk>* curTL;
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_list(size_t size) const {
+ TreeList<Chunk_t, FreeList_t>* curTL;
for (curTL = root(); curTL != NULL;) {
if (curTL->size() == size) { // exact match
break;
@@ -484,10 +513,10 @@ TreeList<Chunk>* BinaryTreeDictionary<Chunk>::find_list(size_t size) const {
}
-template <class Chunk>
-bool BinaryTreeDictionary<Chunk>::verify_chunk_in_free_list(Chunk* tc) const {
+template <class Chunk_t, template <class> class FreeList_t>
+bool BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_chunk_in_free_list(Chunk_t* tc) const {
size_t size = tc->size();
- TreeList<Chunk>* tl = find_list(size);
+ TreeList<Chunk_t, FreeList_t>* tl = find_list(size);
if (tl == NULL) {
return false;
} else {
@@ -495,9 +524,9 @@ bool BinaryTreeDictionary<Chunk>::verify_chunk_in_free_list(Chunk* tc) const {
}
}
-template <class Chunk>
-Chunk* BinaryTreeDictionary<Chunk>::find_largest_dict() const {
- TreeList<Chunk> *curTL = root();
+template <class Chunk_t, template <class> class FreeList_t>
+Chunk_t* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_largest_dict() const {
+ TreeList<Chunk_t, FreeList_t> *curTL = root();
if (curTL != NULL) {
while(curTL->right() != NULL) curTL = curTL->right();
return curTL->largest_address();
@@ -510,15 +539,15 @@ Chunk* BinaryTreeDictionary<Chunk>::find_largest_dict() const {
// chunk in a list on a tree node, just unlink it.
// If it is the last chunk in the list (the next link is NULL),
// remove the node and repair the tree.
-template <class Chunk>
-TreeChunk<Chunk>*
-BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>*
+BinaryTreeDictionary<Chunk_t, FreeList_t>::remove_chunk_from_tree(TreeChunk<Chunk_t, FreeList_t>* tc) {
assert(tc != NULL, "Should not call with a NULL chunk");
assert(tc->is_free(), "Header is not marked correctly");
- TreeList<Chunk> *newTL, *parentTL;
- TreeChunk<Chunk>* retTC;
- TreeList<Chunk>* tl = tc->list();
+ TreeList<Chunk_t, FreeList_t> *newTL, *parentTL;
+ TreeChunk<Chunk_t, FreeList_t>* retTC;
+ TreeList<Chunk_t, FreeList_t>* tl = tc->list();
debug_only(
bool removing_only_chunk = false;
if (tl == _root) {
@@ -538,8 +567,8 @@ BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
retTC = tc;
// Removing this chunk can have the side effect of changing the node
- // (TreeList<Chunk>*) in the tree. If the node is the root, update it.
- TreeList<Chunk>* replacementTL = tl->remove_chunk_replace_if_needed(tc);
+ // (TreeList<Chunk_t, FreeList_t>*) in the tree. If the node is the root, update it.
+ TreeList<Chunk_t, FreeList_t>* replacementTL = tl->remove_chunk_replace_if_needed(tc);
assert(tc->is_free(), "Chunk should still be free");
assert(replacementTL->parent() == NULL ||
replacementTL == replacementTL->parent()->left() ||
@@ -549,17 +578,18 @@ BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
assert(replacementTL->parent() == NULL, "Incorrectly replacing root");
set_root(replacementTL);
}
- debug_only(
+#ifdef ASSERT
if (tl != replacementTL) {
assert(replacementTL->head() != NULL,
"If the tree list was replaced, it should not be a NULL list");
- TreeList<Chunk>* rhl = replacementTL->head_as_TreeChunk()->list();
- TreeList<Chunk>* rtl = TreeChunk<Chunk>::as_TreeChunk(replacementTL->tail())->list();
+ TreeList<Chunk_t, FreeList_t>* rhl = replacementTL->head_as_TreeChunk()->list();
+ TreeList<Chunk_t, FreeList_t>* rtl =
+ TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(replacementTL->tail())->list();
assert(rhl == replacementTL, "Broken head");
assert(rtl == replacementTL, "Broken tail");
assert(replacementTL->size() == tc->size(), "Broken size");
}
- )
+#endif
// Does the tree need to be repaired?
if (replacementTL->count() == 0) {
@@ -574,7 +604,7 @@ BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
} else if (replacementTL->right() == NULL) {
// right is NULL
newTL = replacementTL->left();
- debug_only(replacementTL->clearLeft();)
+ debug_only(replacementTL->clear_left();)
} else { // we have both children, so, by patriarchal convention,
// my replacement is least node in right sub-tree
complicated_splice = true;
@@ -623,7 +653,7 @@ BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
newTL->set_right(replacementTL->right());
debug_only(
replacementTL->clear_right();
- replacementTL->clearLeft();
+ replacementTL->clear_left();
)
}
assert(replacementTL->right() == NULL &&
@@ -644,21 +674,21 @@ BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
verify_tree();
}
assert(!removing_only_chunk || _root == NULL, "root should be NULL");
- return TreeChunk<Chunk>::as_TreeChunk(retTC);
+ return TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(retTC);
}
// Remove the leftmost node (lm) in the tree and return it.
// If lm has a right child, link it to the left node of
// the parent of lm.
-template <class Chunk>
-TreeList<Chunk>* BinaryTreeDictionary<Chunk>::remove_tree_minimum(TreeList<Chunk>* tl) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* BinaryTreeDictionary<Chunk_t, FreeList_t>::remove_tree_minimum(TreeList<Chunk_t, FreeList_t>* tl) {
assert(tl != NULL && tl->parent() != NULL, "really need a proper sub-tree");
// locate the subtree minimum by walking down left branches
- TreeList<Chunk>* curTL = tl;
+ TreeList<Chunk_t, FreeList_t>* curTL = tl;
for (; curTL->left() != NULL; curTL = curTL->left());
// obviously curTL now has at most one child, a right child
if (curTL != root()) { // Should this test just be removed?
- TreeList<Chunk>* parentTL = curTL->parent();
+ TreeList<Chunk_t, FreeList_t>* parentTL = curTL->parent();
if (parentTL->left() == curTL) { // curTL is a left child
parentTL->set_left(curTL->right());
} else {
@@ -685,31 +715,14 @@ TreeList<Chunk>* BinaryTreeDictionary<Chunk>::remove_tree_minimum(TreeList<Chunk
return curTL;
}
-// Based on a simplification of the algorithm by Sleator and Tarjan (JACM 1985).
-// The simplifications are the following:
-// . we splay only when we delete (not when we insert)
-// . we apply a single spay step per deletion/access
-// By doing such partial splaying, we reduce the amount of restructuring,
-// while getting a reasonably efficient search tree (we think).
-// [Measurements will be needed to (in)validate this expectation.]
-
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::semi_splay_step(TreeList<Chunk>* tc) {
- // apply a semi-splay step at the given node:
- // . if root, norting needs to be done
- // . if child of root, splay once
- // . else zig-zig or sig-zag depending on path from grandparent
- if (root() == tc) return;
- warning("*** Splaying not yet implemented; "
- "tree operations may be inefficient ***");
-}
-
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::insert_chunk_in_tree(Chunk* fc) {
- TreeList<Chunk> *curTL, *prevTL;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::insert_chunk_in_tree(Chunk_t* fc) {
+ TreeList<Chunk_t, FreeList_t> *curTL, *prevTL;
size_t size = fc->size();
- assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "too small to be a TreeList<Chunk>");
+ assert((size >= min_size()),
+ err_msg(SIZE_FORMAT " is too small to be a TreeChunk<Chunk_t, FreeList_t> " SIZE_FORMAT,
+ size, min_size()));
if (FLSVerifyDictionary) {
verify_tree();
}
@@ -729,9 +742,9 @@ void BinaryTreeDictionary<Chunk>::insert_chunk_in_tree(Chunk* fc) {
curTL = curTL->right();
}
}
- TreeChunk<Chunk>* tc = TreeChunk<Chunk>::as_TreeChunk(fc);
+ TreeChunk<Chunk_t, FreeList_t>* tc = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(fc);
// This chunk is being returned to the binary tree. Its embedded
- // TreeList<Chunk> should be unused at this point.
+ // TreeList<Chunk_t, FreeList_t> should be unused at this point.
tc->initialize();
if (curTL != NULL) { // exact match
tc->set_list(curTL);
@@ -739,8 +752,8 @@ void BinaryTreeDictionary<Chunk>::insert_chunk_in_tree(Chunk* fc) {
} else { // need a new node in tree
tc->clear_next();
tc->link_prev(NULL);
- TreeList<Chunk>* newTL = TreeList<Chunk>::as_TreeList(tc);
- assert(((TreeChunk<Chunk>*)tc)->list() == newTL,
+ TreeList<Chunk_t, FreeList_t>* newTL = TreeList<Chunk_t, FreeList_t>::as_TreeList(tc);
+ assert(((TreeChunk<Chunk_t, FreeList_t>*)tc)->list() == newTL,
"List was not initialized correctly");
if (prevTL == NULL) { // we are the only tree node
assert(root() == NULL, "control point invariant");
@@ -768,30 +781,30 @@ void BinaryTreeDictionary<Chunk>::insert_chunk_in_tree(Chunk* fc) {
}
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::max_chunk_size() const {
- FreeBlockDictionary<Chunk>::verify_par_locked();
- TreeList<Chunk>* tc = root();
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::max_chunk_size() const {
+ FreeBlockDictionary<Chunk_t>::verify_par_locked();
+ TreeList<Chunk_t, FreeList_t>* tc = root();
if (tc == NULL) return 0;
for (; tc->right() != NULL; tc = tc->right());
return tc->size();
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_list_length(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_list_length(TreeList<Chunk_t, FreeList_t>* tl) const {
size_t res;
res = tl->count();
#ifdef ASSERT
size_t cnt;
- Chunk* tc = tl->head();
+ Chunk_t* tc = tl->head();
for (cnt = 0; tc != NULL; tc = tc->next(), cnt++);
assert(res == cnt, "The count is not being maintained correctly");
#endif
return res;
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_size_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_size_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
if (tl == NULL)
return 0;
return (tl->size() * total_list_length(tl)) +
@@ -799,8 +812,8 @@ size_t BinaryTreeDictionary<Chunk>::total_size_in_tree(TreeList<Chunk>* tl) cons
total_size_in_tree(tl->right());
}
-template <class Chunk>
-double BinaryTreeDictionary<Chunk>::sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+double BinaryTreeDictionary<Chunk_t, FreeList_t>::sum_of_squared_block_sizes(TreeList<Chunk_t, FreeList_t>* const tl) const {
if (tl == NULL) {
return 0.0;
}
@@ -811,8 +824,8 @@ double BinaryTreeDictionary<Chunk>::sum_of_squared_block_sizes(TreeList<Chunk>*
return curr;
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_free_blocks_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_free_blocks_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
if (tl == NULL)
return 0;
return total_list_length(tl) +
@@ -820,28 +833,28 @@ size_t BinaryTreeDictionary<Chunk>::total_free_blocks_in_tree(TreeList<Chunk>* t
total_free_blocks_in_tree(tl->right());
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::num_free_blocks() const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::num_free_blocks() const {
assert(total_free_blocks_in_tree(root()) == total_free_blocks(),
"_total_free_blocks inconsistency");
return total_free_blocks();
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::tree_height_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::tree_height_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
if (tl == NULL)
return 0;
return 1 + MAX2(tree_height_helper(tl->left()),
tree_height_helper(tl->right()));
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::treeHeight() const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::tree_height() const {
return tree_height_helper(root());
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_nodes_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_nodes_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
if (tl == NULL) {
return 0;
}
@@ -849,14 +862,18 @@ size_t BinaryTreeDictionary<Chunk>::total_nodes_helper(TreeList<Chunk>* tl) cons
total_nodes_helper(tl->right());
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_nodes_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_nodes_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
return total_nodes_helper(root());
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::dict_census_udpate(size_t size, bool split, bool birth){
- TreeList<Chunk>* nd = find_list(size);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::dict_census_update(size_t size, bool split, bool birth){}
+
+#ifndef SERIALGC
+template <>
+void BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::dict_census_update(size_t size, bool split, bool birth){
+ TreeList<FreeChunk, AdaptiveFreeList>* nd = find_list(size);
if (nd) {
if (split) {
if (birth) {
@@ -882,16 +899,26 @@ void BinaryTreeDictionary<Chunk>::dict_census_udpate(size_t size, bool split, bo
// This is a birth associated with a LinAB. The chunk
// for the LinAB is not in the dictionary.
}
+#endif // SERIALGC
+
+template <class Chunk_t, template <class> class FreeList_t>
+bool BinaryTreeDictionary<Chunk_t, FreeList_t>::coal_dict_over_populated(size_t size) {
+ // For the general type of freelists, encourage coalescing by
+ // returning true.
+ return true;
+}
-template <class Chunk>
-bool BinaryTreeDictionary<Chunk>::coal_dict_over_populated(size_t size) {
+#ifndef SERIALGC
+template <>
+bool BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::coal_dict_over_populated(size_t size) {
if (FLSAlwaysCoalesceLarge) return true;
- TreeList<Chunk>* list_of_size = find_list(size);
+ TreeList<FreeChunk, AdaptiveFreeList>* list_of_size = find_list(size);
// None of requested size implies overpopulated.
return list_of_size == NULL || list_of_size->coal_desired() <= 0 ||
list_of_size->count() > list_of_size->coal_desired();
}
+#endif // SERIALGC
// Closures for walking the binary tree.
// do_list() walks the free list in a node applying the closure
@@ -899,19 +926,18 @@ bool BinaryTreeDictionary<Chunk>::coal_dict_over_populated(size_t size) {
// do_tree() walks the nodes in the binary tree applying do_list()
// to each list at each node.
-template <class Chunk>
+template <class Chunk_t, template <class> class FreeList_t>
class TreeCensusClosure : public StackObj {
protected:
- virtual void do_list(FreeList<Chunk>* fl) = 0;
+ virtual void do_list(FreeList_t<Chunk_t>* fl) = 0;
public:
- virtual void do_tree(TreeList<Chunk>* tl) = 0;
+ virtual void do_tree(TreeList<Chunk_t, FreeList_t>* tl) = 0;
};
-template <class Chunk>
-class AscendTreeCensusClosure : public TreeCensusClosure<Chunk> {
- using TreeCensusClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class AscendTreeCensusClosure : public TreeCensusClosure<Chunk_t, FreeList_t> {
public:
- void do_tree(TreeList<Chunk>* tl) {
+ void do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
if (tl != NULL) {
do_tree(tl->left());
do_list(tl);
@@ -920,11 +946,10 @@ class AscendTreeCensusClosure : public TreeCensusClosure<Chunk> {
}
};
-template <class Chunk>
-class DescendTreeCensusClosure : public TreeCensusClosure<Chunk> {
- using TreeCensusClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class DescendTreeCensusClosure : public TreeCensusClosure<Chunk_t, FreeList_t> {
public:
- void do_tree(TreeList<Chunk>* tl) {
+ void do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
if (tl != NULL) {
do_tree(tl->right());
do_list(tl);
@@ -935,8 +960,8 @@ class DescendTreeCensusClosure : public TreeCensusClosure<Chunk> {
// For each list in the tree, calculate the desired, desired
// coalesce, count before sweep, and surplus before sweep.
-template <class Chunk>
-class BeginSweepClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class BeginSweepClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
double _percentage;
float _inter_sweep_current;
float _inter_sweep_estimate;
@@ -951,32 +976,36 @@ class BeginSweepClosure : public AscendTreeCensusClosure<Chunk> {
_inter_sweep_estimate(inter_sweep_estimate),
_intra_sweep_estimate(intra_sweep_estimate) { }
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+ void do_list(AdaptiveFreeList<Chunk_t>* fl) {
double coalSurplusPercent = _percentage;
fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
fl->set_coal_desired((ssize_t)((double)fl->desired() * coalSurplusPercent));
fl->set_before_sweep(fl->count());
fl->set_bfr_surp(fl->surplus());
}
+#endif // SERIALGC
};
// Used to search the tree until a condition is met.
// Similar to TreeCensusClosure but searches the
// tree and returns promptly when found.
-template <class Chunk>
+template <class Chunk_t, template <class> class FreeList_t>
class TreeSearchClosure : public StackObj {
protected:
- virtual bool do_list(FreeList<Chunk>* fl) = 0;
+ virtual bool do_list(FreeList_t<Chunk_t>* fl) = 0;
public:
- virtual bool do_tree(TreeList<Chunk>* tl) = 0;
+ virtual bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) = 0;
};
#if 0 // Don't need this yet but here for symmetry.
-template <class Chunk>
-class AscendTreeSearchClosure : public TreeSearchClosure {
+template <class Chunk_t, template <class> class FreeList_t>
+class AscendTreeSearchClosure : public TreeSearchClosure<Chunk_t> {
public:
- bool do_tree(TreeList<Chunk>* tl) {
+ bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
if (tl != NULL) {
if (do_tree(tl->left())) return true;
if (do_list(tl)) return true;
@@ -987,11 +1016,10 @@ class AscendTreeSearchClosure : public TreeSearchClosure {
};
#endif
-template <class Chunk>
-class DescendTreeSearchClosure : public TreeSearchClosure<Chunk> {
- using TreeSearchClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class DescendTreeSearchClosure : public TreeSearchClosure<Chunk_t, FreeList_t> {
public:
- bool do_tree(TreeList<Chunk>* tl) {
+ bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
if (tl != NULL) {
if (do_tree(tl->right())) return true;
if (do_list(tl)) return true;
@@ -1003,17 +1031,17 @@ class DescendTreeSearchClosure : public TreeSearchClosure<Chunk> {
// Searches the tree for a chunk that ends at the
// specified address.
-template <class Chunk>
-class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk_t, FreeList_t> {
HeapWord* _target;
- Chunk* _found;
+ Chunk_t* _found;
public:
EndTreeSearchClosure(HeapWord* target) : _target(target), _found(NULL) {}
- bool do_list(FreeList<Chunk>* fl) {
- Chunk* item = fl->head();
+ bool do_list(FreeList_t<Chunk_t>* fl) {
+ Chunk_t* item = fl->head();
while (item != NULL) {
- if (item->end() == _target) {
+ if (item->end() == (uintptr_t*) _target) {
_found = item;
return true;
}
@@ -1021,22 +1049,22 @@ class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk> {
}
return false;
}
- Chunk* found() { return _found; }
+ Chunk_t* found() { return _found; }
};
-template <class Chunk>
-Chunk* BinaryTreeDictionary<Chunk>::find_chunk_ends_at(HeapWord* target) const {
- EndTreeSearchClosure<Chunk> etsc(target);
+template <class Chunk_t, template <class> class FreeList_t>
+Chunk_t* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_chunk_ends_at(HeapWord* target) const {
+ EndTreeSearchClosure<Chunk_t, FreeList_t> etsc(target);
bool found_target = etsc.do_tree(root());
assert(found_target || etsc.found() == NULL, "Consistency check");
assert(!found_target || etsc.found() != NULL, "Consistency check");
return etsc.found();
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::begin_sweep_dict_census(double coalSurplusPercent,
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::begin_sweep_dict_census(double coalSurplusPercent,
float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
- BeginSweepClosure<Chunk> bsc(coalSurplusPercent, inter_sweep_current,
+ BeginSweepClosure<Chunk_t, FreeList_t> bsc(coalSurplusPercent, inter_sweep_current,
inter_sweep_estimate,
intra_sweep_estimate);
bsc.do_tree(root());
@@ -1045,84 +1073,91 @@ void BinaryTreeDictionary<Chunk>::begin_sweep_dict_census(double coalSurplusPerc
// Closures and methods for calculating total bytes returned to the
// free lists in the tree.
#ifndef PRODUCT
-template <class Chunk>
-class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
public:
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList_t<Chunk_t>* fl) {
fl->set_returned_bytes(0);
}
};
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::initialize_dict_returned_bytes() {
- InitializeDictReturnedBytesClosure<Chunk> idrb;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::initialize_dict_returned_bytes() {
+ InitializeDictReturnedBytesClosure<Chunk_t, FreeList_t> idrb;
idrb.do_tree(root());
}
-template <class Chunk>
-class ReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class ReturnedBytesClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
size_t _dict_returned_bytes;
public:
ReturnedBytesClosure() { _dict_returned_bytes = 0; }
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList_t<Chunk_t>* fl) {
_dict_returned_bytes += fl->returned_bytes();
}
size_t dict_returned_bytes() { return _dict_returned_bytes; }
};
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::sum_dict_returned_bytes() {
- ReturnedBytesClosure<Chunk> rbc;
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::sum_dict_returned_bytes() {
+ ReturnedBytesClosure<Chunk_t, FreeList_t> rbc;
rbc.do_tree(root());
return rbc.dict_returned_bytes();
}
// Count the number of entries in the tree.
-template <class Chunk>
-class treeCountClosure : public DescendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class treeCountClosure : public DescendTreeCensusClosure<Chunk_t, FreeList_t> {
public:
uint count;
treeCountClosure(uint c) { count = c; }
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList_t<Chunk_t>* fl) {
count++;
}
};
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_count() {
- treeCountClosure<Chunk> ctc(0);
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_count() {
+ treeCountClosure<Chunk_t, FreeList_t> ctc(0);
ctc.do_tree(root());
return ctc.count;
}
#endif // PRODUCT
// Calculate surpluses for the lists in the tree.
-template <class Chunk>
-class setTreeSurplusClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class setTreeSurplusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
double percentage;
public:
setTreeSurplusClosure(double v) { percentage = v; }
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+ void do_list(AdaptiveFreeList<Chunk_t>* fl) {
double splitSurplusPercent = percentage;
fl->set_surplus(fl->count() -
(ssize_t)((double)fl->desired() * splitSurplusPercent));
}
+#endif // SERIALGC
};
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::set_tree_surplus(double splitSurplusPercent) {
- setTreeSurplusClosure<Chunk> sts(splitSurplusPercent);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::set_tree_surplus(double splitSurplusPercent) {
+ setTreeSurplusClosure<Chunk_t, FreeList_t> sts(splitSurplusPercent);
sts.do_tree(root());
}
// Set hints for the lists in the tree.
-template <class Chunk>
-class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk_t, FreeList_t> {
size_t hint;
public:
setTreeHintsClosure(size_t v) { hint = v; }
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+ void do_list(AdaptiveFreeList<Chunk_t>* fl) {
fl->set_hint(hint);
assert(fl->hint() == 0 || fl->hint() > fl->size(),
"Current hint is inconsistent");
@@ -1130,35 +1165,40 @@ class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk> {
hint = fl->size();
}
}
+#endif // SERIALGC
};
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::set_tree_hints(void) {
- setTreeHintsClosure<Chunk> sth(0);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::set_tree_hints(void) {
+ setTreeHintsClosure<Chunk_t, FreeList_t> sth(0);
sth.do_tree(root());
}
// Save count before previous sweep and splits and coalesces.
-template <class Chunk>
-class clearTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
- void do_list(FreeList<Chunk>* fl) {
+template <class Chunk_t, template <class> class FreeList_t>
+class clearTreeCensusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
+ void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+ void do_list(AdaptiveFreeList<Chunk_t>* fl) {
fl->set_prev_sweep(fl->count());
fl->set_coal_births(0);
fl->set_coal_deaths(0);
fl->set_split_births(0);
fl->set_split_deaths(0);
}
+#endif // SERIALGC
};
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::clear_tree_census(void) {
- clearTreeCensusClosure<Chunk> ctc;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::clear_tree_census(void) {
+ clearTreeCensusClosure<Chunk_t, FreeList_t> ctc;
ctc.do_tree(root());
}
// Do reporting and post sweep clean up.
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::end_sweep_dict_census(double splitSurplusPercent) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::end_sweep_dict_census(double splitSurplusPercent) {
// Does walking the tree 3 times hurt?
set_tree_surplus(splitSurplusPercent);
set_tree_hints();
@@ -1169,9 +1209,9 @@ void BinaryTreeDictionary<Chunk>::end_sweep_dict_census(double splitSurplusPerce
}
// Print summary statistics
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::report_statistics() const {
- FreeBlockDictionary<Chunk>::verify_par_locked();
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::report_statistics() const {
+ FreeBlockDictionary<Chunk_t>::verify_par_locked();
gclog_or_tty->print("Statistics for BinaryTreeDictionary:\n"
"------------------------------------\n");
size_t total_size = total_chunk_size(debug_only(NULL));
@@ -1182,36 +1222,47 @@ void BinaryTreeDictionary<Chunk>::report_statistics() const {
if (free_blocks > 0) {
gclog_or_tty->print("Av. Block Size: %d\n", total_size/free_blocks);
}
- gclog_or_tty->print("Tree Height: %d\n", treeHeight());
+ gclog_or_tty->print("Tree Height: %d\n", tree_height());
}
// Print census information - counts, births, deaths, etc.
// for each list in the tree. Also print some summary
// information.
-template <class Chunk>
-class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
int _print_line;
size_t _total_free;
- FreeList<Chunk> _total;
+ FreeList_t<Chunk_t> _total;
public:
PrintTreeCensusClosure() {
_print_line = 0;
_total_free = 0;
}
- FreeList<Chunk>* total() { return &_total; }
+ FreeList_t<Chunk_t>* total() { return &_total; }
size_t total_free() { return _total_free; }
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList<Chunk_t>* fl) {
if (++_print_line >= 40) {
- FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
+ FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
_print_line = 0;
}
fl->print_on(gclog_or_tty);
_total_free += fl->count() * fl->size() ;
total()->set_count( total()->count() + fl->count() );
- total()->set_bfr_surp( total()->bfr_surp() + fl->bfr_surp() );
+ }
+
+#ifndef SERIALGC
+ void do_list(AdaptiveFreeList<Chunk_t>* fl) {
+ if (++_print_line >= 40) {
+ FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
+ _print_line = 0;
+ }
+ fl->print_on(gclog_or_tty);
+ _total_free += fl->count() * fl->size() ;
+ total()->set_count( total()->count() + fl->count() );
+ total()->set_bfr_surp( total()->bfr_surp() + fl->bfr_surp() );
total()->set_surplus( total()->split_deaths() + fl->surplus() );
- total()->set_desired( total()->desired() + fl->desired() );
+ total()->set_desired( total()->desired() + fl->desired() );
total()->set_prev_sweep( total()->prev_sweep() + fl->prev_sweep() );
total()->set_before_sweep(total()->before_sweep() + fl->before_sweep());
total()->set_coal_births( total()->coal_births() + fl->coal_births() );
@@ -1219,18 +1270,32 @@ class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
total()->set_split_births(total()->split_births() + fl->split_births());
total()->set_split_deaths(total()->split_deaths() + fl->split_deaths());
}
+#endif // SERIALGC
};
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::print_dict_census(void) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::print_dict_census(void) const {
gclog_or_tty->print("\nBinaryTree\n");
- FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
- PrintTreeCensusClosure<Chunk> ptc;
+ FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
+ PrintTreeCensusClosure<Chunk_t, FreeList_t> ptc;
ptc.do_tree(root());
- FreeList<Chunk>* total = ptc.total();
- FreeList<Chunk>::print_labels_on(gclog_or_tty, " ");
+ FreeList_t<Chunk_t>* total = ptc.total();
+ FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, " ");
+}
+
+#ifndef SERIALGC
+template <>
+void BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::print_dict_census(void) const {
+
+ gclog_or_tty->print("\nBinaryTree\n");
+ AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+ PrintTreeCensusClosure<FreeChunk, AdaptiveFreeList> ptc;
+ ptc.do_tree(root());
+
+ AdaptiveFreeList<FreeChunk>* total = ptc.total();
+ AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, " ");
total->print_on(gclog_or_tty, "TOTAL\t");
gclog_or_tty->print(
"total_free(words): " SIZE_FORMAT_W(16)
@@ -1242,9 +1307,10 @@ void BinaryTreeDictionary<Chunk>::print_dict_census(void) const {
(double)(total->desired() - total->count())
/(total->desired() != 0 ? (double)total->desired() : 1.0));
}
+#endif // SERIALGC
-template <class Chunk>
-class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
outputStream* _st;
int _print_line;
@@ -1253,14 +1319,14 @@ class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk> {
_st = st;
_print_line = 0;
}
- void do_list(FreeList<Chunk>* fl) {
+ void do_list(FreeList_t<Chunk_t>* fl) {
if (++_print_line >= 40) {
- FreeList<Chunk>::print_labels_on(_st, "size");
+ FreeList_t<Chunk_t>::print_labels_on(_st, "size");
_print_line = 0;
}
fl->print_on(gclog_or_tty);
size_t sz = fl->size();
- for (Chunk* fc = fl->head(); fc != NULL;
+ for (Chunk_t* fc = fl->head(); fc != NULL;
fc = fc->next()) {
_st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ") %s",
fc, (HeapWord*)fc + sz,
@@ -1269,11 +1335,11 @@ class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk> {
}
};
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::print_free_lists(outputStream* st) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::print_free_lists(outputStream* st) const {
- FreeList<Chunk>::print_labels_on(st, "size");
- PrintFreeListsClosure<Chunk> pflc(st);
+ FreeList_t<Chunk_t>::print_labels_on(st, "size");
+ PrintFreeListsClosure<Chunk_t, FreeList_t> pflc(st);
pflc.do_tree(root());
}
@@ -1281,18 +1347,18 @@ void BinaryTreeDictionary<Chunk>::print_free_lists(outputStream* st) const {
// . _root has no parent
// . parent and child point to each other
// . each node's key correctly related to that of its child(ren)
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify_tree() const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_tree() const {
guarantee(root() == NULL || total_free_blocks() == 0 ||
total_size() != 0, "_total_size should't be 0?");
guarantee(root() == NULL || root()->parent() == NULL, "_root shouldn't have parent");
verify_tree_helper(root());
}
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::verify_prev_free_ptrs(TreeList<Chunk>* tl) {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_prev_free_ptrs(TreeList<Chunk_t, FreeList_t>* tl) {
size_t ct = 0;
- for (Chunk* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
+ for (Chunk_t* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
ct++;
assert(curFC->prev() == NULL || curFC->prev()->is_free(),
"Chunk should be free");
@@ -1303,8 +1369,8 @@ size_t BinaryTreeDictionary<Chunk>::verify_prev_free_ptrs(TreeList<Chunk>* tl) {
// Note: this helper is recursive rather than iterative, so use with
// caution on very deep trees; and watch out for stack overflow errors;
// In general, to be used only for debugging.
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify_tree_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_tree_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
if (tl == NULL)
return;
guarantee(tl->size() != 0, "A list must has a size");
@@ -1332,15 +1398,25 @@ void BinaryTreeDictionary<Chunk>::verify_tree_helper(TreeList<Chunk>* tl) const
verify_tree_helper(tl->right());
}
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify() const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify() const {
verify_tree();
guarantee(total_size() == total_size_in_tree(root()), "Total Size inconsistency");
}
+template class TreeList<Metablock, FreeList>;
+template class BinaryTreeDictionary<Metablock, FreeList>;
+template class TreeChunk<Metablock, FreeList>;
+
+template class TreeList<Metachunk, FreeList>;
+template class BinaryTreeDictionary<Metachunk, FreeList>;
+template class TreeChunk<Metachunk, FreeList>;
+
+
#ifndef SERIALGC
// Explicitly instantiate these types for FreeChunk.
-template class BinaryTreeDictionary<FreeChunk>;
-template class TreeChunk<FreeChunk>;
-template class TreeList<FreeChunk>;
+template class TreeList<FreeChunk, AdaptiveFreeList>;
+template class BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>;
+template class TreeChunk<FreeChunk, AdaptiveFreeList>;
+
#endif // SERIALGC
diff --git a/src/share/vm/memory/binaryTreeDictionary.hpp b/src/share/vm/memory/binaryTreeDictionary.hpp
index 4ab60c2dc..757eb4fda 100644
--- a/src/share/vm/memory/binaryTreeDictionary.hpp
+++ b/src/share/vm/memory/binaryTreeDictionary.hpp
@@ -37,77 +37,78 @@
// A TreeList is a FreeList which can be used to maintain a
// binary tree of free lists.
-template <class Chunk> class TreeChunk;
-template <class Chunk> class BinaryTreeDictionary;
-template <class Chunk> class AscendTreeCensusClosure;
-template <class Chunk> class DescendTreeCensusClosure;
-template <class Chunk> class DescendTreeSearchClosure;
-
-template <class Chunk>
-class TreeList: public FreeList<Chunk> {
- friend class TreeChunk<Chunk>;
- friend class BinaryTreeDictionary<Chunk>;
- friend class AscendTreeCensusClosure<Chunk>;
- friend class DescendTreeCensusClosure<Chunk>;
- friend class DescendTreeSearchClosure<Chunk>;
-
- TreeList<Chunk>* _parent;
- TreeList<Chunk>* _left;
- TreeList<Chunk>* _right;
+template <class Chunk_t, template <class> class FreeList_t> class TreeChunk;
+template <class Chunk_t, template <class> class FreeList_t> class BinaryTreeDictionary;
+template <class Chunk_t, template <class> class FreeList_t> class AscendTreeCensusClosure;
+template <class Chunk_t, template <class> class FreeList_t> class DescendTreeCensusClosure;
+template <class Chunk_t, template <class> class FreeList_t> class DescendTreeSearchClosure;
+
+template <class Chunk_t, template <class> class FreeList_t>
+class TreeList : public FreeList_t<Chunk_t> {
+ friend class TreeChunk<Chunk_t, FreeList_t>;
+ friend class BinaryTreeDictionary<Chunk_t, FreeList_t>;
+ friend class AscendTreeCensusClosure<Chunk_t, FreeList_t>;
+ friend class DescendTreeCensusClosure<Chunk_t, FreeList_t>;
+ friend class DescendTreeSearchClosure<Chunk_t, FreeList_t>;
+
+ TreeList<Chunk_t, FreeList_t>* _parent;
+ TreeList<Chunk_t, FreeList_t>* _left;
+ TreeList<Chunk_t, FreeList_t>* _right;
protected:
- TreeList<Chunk>* parent() const { return _parent; }
- TreeList<Chunk>* left() const { return _left; }
- TreeList<Chunk>* right() const { return _right; }
- // Explicitly import these names into our namespace to fix name lookup with templates
- using FreeList<Chunk>::head;
- using FreeList<Chunk>::set_head;
+ TreeList<Chunk_t, FreeList_t>* parent() const { return _parent; }
+ TreeList<Chunk_t, FreeList_t>* left() const { return _left; }
+ TreeList<Chunk_t, FreeList_t>* right() const { return _right; }
- using FreeList<Chunk>::tail;
- using FreeList<Chunk>::set_tail;
- using FreeList<Chunk>::link_tail;
+ // Wrapper on call to base class, to get the template to compile.
+ Chunk_t* head() const { return FreeList_t<Chunk_t>::head(); }
+ Chunk_t* tail() const { return FreeList_t<Chunk_t>::tail(); }
+ void set_head(Chunk_t* head) { FreeList_t<Chunk_t>::set_head(head); }
+ void set_tail(Chunk_t* tail) { FreeList_t<Chunk_t>::set_tail(tail); }
- using FreeList<Chunk>::increment_count;
- NOT_PRODUCT(using FreeList<Chunk>::increment_returned_bytes_by;)
- using FreeList<Chunk>::verify_chunk_in_free_list;
- using FreeList<Chunk>::size;
+ size_t size() const { return FreeList_t<Chunk_t>::size(); }
// Accessors for links in tree.
- void set_left(TreeList<Chunk>* tl) {
+ void set_left(TreeList<Chunk_t, FreeList_t>* tl) {
_left = tl;
if (tl != NULL)
tl->set_parent(this);
}
- void set_right(TreeList<Chunk>* tl) {
+ void set_right(TreeList<Chunk_t, FreeList_t>* tl) {
_right = tl;
if (tl != NULL)
tl->set_parent(this);
}
- void set_parent(TreeList<Chunk>* tl) { _parent = tl; }
+ void set_parent(TreeList<Chunk_t, FreeList_t>* tl) { _parent = tl; }
- void clearLeft() { _left = NULL; }
+ void clear_left() { _left = NULL; }
void clear_right() { _right = NULL; }
void clear_parent() { _parent = NULL; }
- void initialize() { clearLeft(); clear_right(), clear_parent(); }
+ void initialize() { clear_left(); clear_right(), clear_parent(); FreeList_t<Chunk_t>::initialize(); }
// For constructing a TreeList from a Tree chunk or
// address and size.
- static TreeList<Chunk>* as_TreeList(TreeChunk<Chunk>* tc);
- static TreeList<Chunk>* as_TreeList(HeapWord* addr, size_t size);
+ TreeList();
+ static TreeList<Chunk_t, FreeList_t>*
+ as_TreeList(TreeChunk<Chunk_t, FreeList_t>* tc);
+ static TreeList<Chunk_t, FreeList_t>* as_TreeList(HeapWord* addr, size_t size);
// Returns the head of the free list as a pointer to a TreeChunk.
- TreeChunk<Chunk>* head_as_TreeChunk();
+ TreeChunk<Chunk_t, FreeList_t>* head_as_TreeChunk();
// Returns the first available chunk in the free list as a pointer
// to a TreeChunk.
- TreeChunk<Chunk>* first_available();
+ TreeChunk<Chunk_t, FreeList_t>* first_available();
// Returns the block with the largest heap address amongst
// those in the list for this size; potentially slow and expensive,
// use with caution!
- TreeChunk<Chunk>* largest_address();
+ TreeChunk<Chunk_t, FreeList_t>* largest_address();
+
+ TreeList<Chunk_t, FreeList_t>* get_better_list(
+ BinaryTreeDictionary<Chunk_t, FreeList_t>* dictionary);
// remove_chunk_replace_if_needed() removes the given "tc" from the TreeList.
// If "tc" is the first chunk in the list, it is also the
@@ -115,10 +116,10 @@ class TreeList: public FreeList<Chunk> {
// returns the possibly replaced TreeList* for the node in
// the tree. It also updates the parent of the original
// node to point to the new node.
- TreeList<Chunk>* remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc);
+ TreeList<Chunk_t, FreeList_t>* remove_chunk_replace_if_needed(TreeChunk<Chunk_t, FreeList_t>* tc);
// See FreeList.
- void return_chunk_at_head(TreeChunk<Chunk>* tc);
- void return_chunk_at_tail(TreeChunk<Chunk>* tc);
+ void return_chunk_at_head(TreeChunk<Chunk_t, FreeList_t>* tc);
+ void return_chunk_at_tail(TreeChunk<Chunk_t, FreeList_t>* tc);
};
// A TreeChunk is a subclass of a Chunk that additionally
@@ -134,52 +135,54 @@ class TreeList: public FreeList<Chunk> {
// on the free list for a node in the tree and is only removed if
// it is the last chunk on the free list.
-template <class Chunk>
-class TreeChunk : public Chunk {
- friend class TreeList<Chunk>;
- TreeList<Chunk>* _list;
- TreeList<Chunk> _embedded_list; // if non-null, this chunk is on _list
+template <class Chunk_t, template <class> class FreeList_t>
+class TreeChunk : public Chunk_t {
+ friend class TreeList<Chunk_t, FreeList_t>;
+ TreeList<Chunk_t, FreeList_t>* _list;
+ TreeList<Chunk_t, FreeList_t> _embedded_list; // if non-null, this chunk is on _list
+
+ static size_t _min_tree_chunk_size;
+
protected:
- TreeList<Chunk>* embedded_list() const { return (TreeList<Chunk>*) &_embedded_list; }
- void set_embedded_list(TreeList<Chunk>* v) { _embedded_list = *v; }
+ TreeList<Chunk_t, FreeList_t>* embedded_list() const { return (TreeList<Chunk_t, FreeList_t>*) &_embedded_list; }
+ void set_embedded_list(TreeList<Chunk_t, FreeList_t>* v) { _embedded_list = *v; }
public:
- TreeList<Chunk>* list() { return _list; }
- void set_list(TreeList<Chunk>* v) { _list = v; }
- static TreeChunk<Chunk>* as_TreeChunk(Chunk* fc);
+ TreeList<Chunk_t, FreeList_t>* list() { return _list; }
+ void set_list(TreeList<Chunk_t, FreeList_t>* v) { _list = v; }
+ static TreeChunk<Chunk_t, FreeList_t>* as_TreeChunk(Chunk_t* fc);
// Initialize fields in a TreeChunk that should be
// initialized when the TreeChunk is being added to
// a free list in the tree.
void initialize() { embedded_list()->initialize(); }
- Chunk* next() const { return Chunk::next(); }
- Chunk* prev() const { return Chunk::prev(); }
- size_t size() const volatile { return Chunk::size(); }
+ Chunk_t* next() const { return Chunk_t::next(); }
+ Chunk_t* prev() const { return Chunk_t::prev(); }
+ size_t size() const volatile { return Chunk_t::size(); }
+
+ static size_t min_size() {
+ return _min_tree_chunk_size;
+ }
// debugging
void verify_tree_chunk_list() const;
+ void assert_is_mangled() const;
};
-template <class Chunk>
-class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class BinaryTreeDictionary: public FreeBlockDictionary<Chunk_t> {
friend class VMStructs;
- bool _splay;
- bool _adaptive_freelists;
size_t _total_size;
size_t _total_free_blocks;
- TreeList<Chunk>* _root;
+ TreeList<Chunk_t, FreeList_t>* _root;
// private accessors
- bool splay() const { return _splay; }
- void set_splay(bool v) { _splay = v; }
void set_total_size(size_t v) { _total_size = v; }
virtual void inc_total_size(size_t v);
virtual void dec_total_size(size_t v);
- size_t total_free_blocks() const { return _total_free_blocks; }
void set_total_free_blocks(size_t v) { _total_free_blocks = v; }
- TreeList<Chunk>* root() const { return _root; }
- void set_root(TreeList<Chunk>* v) { _root = v; }
- bool adaptive_freelists() { return _adaptive_freelists; }
+ TreeList<Chunk_t, FreeList_t>* root() const { return _root; }
+ void set_root(TreeList<Chunk_t, FreeList_t>* v) { _root = v; }
// This field is added and can be set to point to the
// the Mutex used to synchronize access to the
@@ -191,54 +194,55 @@ class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
// return it. If the chunk
// is the last chunk of that size, remove the node for that size
// from the tree.
- TreeChunk<Chunk>* get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay);
- // Return a list of the specified size or NULL from the tree.
- // The list is not removed from the tree.
- TreeList<Chunk>* find_list (size_t size) const;
+ TreeChunk<Chunk_t, FreeList_t>* get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither);
// Remove this chunk from the tree. If the removal results
// in an empty list in the tree, remove the empty list.
- TreeChunk<Chunk>* remove_chunk_from_tree(TreeChunk<Chunk>* tc);
+ TreeChunk<Chunk_t, FreeList_t>* remove_chunk_from_tree(TreeChunk<Chunk_t, FreeList_t>* tc);
// Remove the node in the trees starting at tl that has the
// minimum value and return it. Repair the tree as needed.
- TreeList<Chunk>* remove_tree_minimum(TreeList<Chunk>* tl);
- void semi_splay_step(TreeList<Chunk>* tl);
+ TreeList<Chunk_t, FreeList_t>* remove_tree_minimum(TreeList<Chunk_t, FreeList_t>* tl);
// Add this free chunk to the tree.
- void insert_chunk_in_tree(Chunk* freeChunk);
+ void insert_chunk_in_tree(Chunk_t* freeChunk);
public:
- static const size_t min_tree_chunk_size = sizeof(TreeChunk<Chunk>)/HeapWordSize;
+ // Return a list of the specified size or NULL from the tree.
+ // The list is not removed from the tree.
+ TreeList<Chunk_t, FreeList_t>* find_list (size_t size) const;
void verify_tree() const;
// verify that the given chunk is in the tree.
- bool verify_chunk_in_free_list(Chunk* tc) const;
+ bool verify_chunk_in_free_list(Chunk_t* tc) const;
private:
- void verify_tree_helper(TreeList<Chunk>* tl) const;
- static size_t verify_prev_free_ptrs(TreeList<Chunk>* tl);
+ void verify_tree_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
+ static size_t verify_prev_free_ptrs(TreeList<Chunk_t, FreeList_t>* tl);
// Returns the total number of chunks in the list.
- size_t total_list_length(TreeList<Chunk>* tl) const;
+ size_t total_list_length(TreeList<Chunk_t, FreeList_t>* tl) const;
// Returns the total number of words in the chunks in the tree
// starting at "tl".
- size_t total_size_in_tree(TreeList<Chunk>* tl) const;
+ size_t total_size_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
// Returns the sum of the square of the size of each block
// in the tree starting at "tl".
- double sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const;
+ double sum_of_squared_block_sizes(TreeList<Chunk_t, FreeList_t>* const tl) const;
// Returns the total number of free blocks in the tree starting
// at "tl".
- size_t total_free_blocks_in_tree(TreeList<Chunk>* tl) const;
- size_t num_free_blocks() const;
- size_t treeHeight() const;
- size_t tree_height_helper(TreeList<Chunk>* tl) const;
- size_t total_nodes_in_tree(TreeList<Chunk>* tl) const;
- size_t total_nodes_helper(TreeList<Chunk>* tl) const;
+ size_t total_free_blocks_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
+ size_t num_free_blocks() const;
+ size_t tree_height() const;
+ size_t tree_height_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
+ size_t total_nodes_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
+ size_t total_nodes_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
public:
// Constructor
- BinaryTreeDictionary(bool adaptive_freelists, bool splay = false);
- BinaryTreeDictionary(MemRegion mr, bool adaptive_freelists, bool splay = false);
+ BinaryTreeDictionary() :
+ _total_size(0), _total_free_blocks(0), _root(0) {}
+
+ BinaryTreeDictionary(MemRegion mr);
// Public accessors
size_t total_size() const { return _total_size; }
+ size_t total_free_blocks() const { return _total_free_blocks; }
// Reset the dictionary to the initial conditions with
// a single free chunk.
@@ -249,23 +253,24 @@ class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
// Return a chunk of size "size" or greater from
// the tree.
- // want a better dynamic splay strategy for the future.
- Chunk* get_chunk(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither) {
- FreeBlockDictionary<Chunk>::verify_par_locked();
- Chunk* res = get_chunk_from_tree(size, dither, splay());
+ Chunk_t* get_chunk(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither) {
+ FreeBlockDictionary<Chunk_t>::verify_par_locked();
+ Chunk_t* res = get_chunk_from_tree(size, dither);
assert(res == NULL || res->is_free(),
"Should be returning a free chunk");
+ assert(dither != FreeBlockDictionary<Chunk_t>::exactly ||
+ res == NULL || res->size() == size, "Not correct size");
return res;
}
- void return_chunk(Chunk* chunk) {
- FreeBlockDictionary<Chunk>::verify_par_locked();
+ void return_chunk(Chunk_t* chunk) {
+ FreeBlockDictionary<Chunk_t>::verify_par_locked();
insert_chunk_in_tree(chunk);
}
- void remove_chunk(Chunk* chunk) {
- FreeBlockDictionary<Chunk>::verify_par_locked();
- remove_chunk_from_tree((TreeChunk<Chunk>*)chunk);
+ void remove_chunk(Chunk_t* chunk) {
+ FreeBlockDictionary<Chunk_t>::verify_par_locked();
+ remove_chunk_from_tree((TreeChunk<Chunk_t, FreeList_t>*)chunk);
assert(chunk->is_free(), "Should still be a free chunk");
}
@@ -281,19 +286,19 @@ class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
}
size_t min_size() const {
- return min_tree_chunk_size;
+ return TreeChunk<Chunk_t, FreeList_t>::min_size();
}
double sum_of_squared_block_sizes() const {
return sum_of_squared_block_sizes(root());
}
- Chunk* find_chunk_ends_at(HeapWord* target) const;
+ Chunk_t* find_chunk_ends_at(HeapWord* target) const;
// Find the list with size "size" in the binary tree and update
// the statistics in the list according to "split" (chunk was
// split or coalesce) and "birth" (chunk was added or removed).
- void dict_census_udpate(size_t size, bool split, bool birth);
+ void dict_census_update(size_t size, bool split, bool birth);
// Return true if the dictionary is overpopulated (more chunks of
// this size than desired) for size "size".
bool coal_dict_over_populated(size_t size);
@@ -307,7 +312,7 @@ class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
// statistics for the sweep.
void end_sweep_dict_census(double splitSurplusPercent);
// Return the largest free chunk in the tree.
- Chunk* find_largest_dict() const;
+ Chunk_t* find_largest_dict() const;
// Accessors for statistics
void set_tree_surplus(double splitSurplusPercent);
void set_tree_hints(void);
diff --git a/src/share/vm/memory/filemap.cpp b/src/share/vm/memory/filemap.cpp
index d4a11f408..c3d1bf5f9 100644
--- a/src/share/vm/memory/filemap.cpp
+++ b/src/share/vm/memory/filemap.cpp
@@ -29,6 +29,7 @@
#include "runtime/arguments.hpp"
#include "runtime/java.hpp"
#include "runtime/os.hpp"
+#include "services/memTracker.hpp"
#include "utilities/defaultStream.hpp"
# include <sys/stat.h>
@@ -344,25 +345,14 @@ ReservedSpace FileMapInfo::reserve_shared_memory() {
fail_continue(err_msg("Unable to reserved shared space at required address " INTPTR_FORMAT, requested_addr));
return rs;
}
+ // the reserved virtual memory is for mapping class data sharing archive
+ if (MemTracker::is_on()) {
+ MemTracker::record_virtual_memory_type((address)rs.base(), mtClassShared);
+ }
return rs;
}
// Memory map a region in the address space.
-
-char* FileMapInfo::map_region(int i, ReservedSpace rs) {
- struct FileMapInfo::FileMapHeader::space_info* si = &_header._space[i];
- size_t used = si->_used;
- size_t size = align_size_up(used, os::vm_allocation_granularity());
-
- ReservedSpace mapped_rs = rs.first_part(size, true, true);
- ReservedSpace unmapped_rs = rs.last_part(size);
- mapped_rs.release();
-
- return map_region(i);
-}
-
-
-// Memory map a region in the address space.
static const char* shared_region_name[] = { "ReadOnly", "ReadWrite", "MiscData", "MiscCode"};
char* FileMapInfo::map_region(int i) {
diff --git a/src/share/vm/memory/filemap.hpp b/src/share/vm/memory/filemap.hpp
index 760ddfe17..7cdd9616c 100644
--- a/src/share/vm/memory/filemap.hpp
+++ b/src/share/vm/memory/filemap.hpp
@@ -125,7 +125,6 @@ public:
size_t capacity, bool read_only, bool allow_exec);
void write_bytes(const void* buffer, int count);
void write_bytes_aligned(const void* buffer, int count);
- char* map_region(int i, ReservedSpace rs);
char* map_region(int i);
void unmap_region(int i);
void close();
diff --git a/src/share/vm/memory/freeBlockDictionary.cpp b/src/share/vm/memory/freeBlockDictionary.cpp
index 13b4daa89..9b2221281 100644
--- a/src/share/vm/memory/freeBlockDictionary.cpp
+++ b/src/share/vm/memory/freeBlockDictionary.cpp
@@ -27,6 +27,8 @@
#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
#endif // SERIALGC
#include "memory/freeBlockDictionary.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
#ifdef TARGET_OS_FAMILY_linux
# include "thread_linux.inline.hpp"
#endif
@@ -62,6 +64,9 @@ template <class Chunk> void FreeBlockDictionary<Chunk>::verify_par_locked() cons
}
#endif
+template class FreeBlockDictionary<Metablock>;
+template class FreeBlockDictionary<Metachunk>;
+
#ifndef SERIALGC
// Explicitly instantiate for FreeChunk
template class FreeBlockDictionary<FreeChunk>;
diff --git a/src/share/vm/memory/freeBlockDictionary.hpp b/src/share/vm/memory/freeBlockDictionary.hpp
index 573cfed3a..2502e362d 100644
--- a/src/share/vm/memory/freeBlockDictionary.hpp
+++ b/src/share/vm/memory/freeBlockDictionary.hpp
@@ -66,7 +66,7 @@ class FreeBlockDictionary: public CHeapObj<mtGC> {
virtual void reset(HeapWord* addr, size_t size) = 0;
virtual void reset() = 0;
- virtual void dict_census_udpate(size_t size, bool split, bool birth) = 0;
+ virtual void dict_census_update(size_t size, bool split, bool birth) = 0;
virtual bool coal_dict_over_populated(size_t size) = 0;
virtual void begin_sweep_dict_census(double coalSurplusPercent,
float inter_sweep_current, float inter_sweep_estimate,
diff --git a/src/share/vm/memory/freeList.cpp b/src/share/vm/memory/freeList.cpp
index a5fbc06ee..f5cd80545 100644
--- a/src/share/vm/memory/freeList.cpp
+++ b/src/share/vm/memory/freeList.cpp
@@ -25,6 +25,8 @@
#include "precompiled.hpp"
#include "memory/freeBlockDictionary.hpp"
#include "memory/freeList.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
#include "memory/sharedHeap.hpp"
#include "runtime/globals.hpp"
#include "runtime/mutex.hpp"
@@ -49,8 +51,6 @@ FreeList<Chunk>::FreeList() :
{
_size = 0;
_count = 0;
- _hint = 0;
- init_statistics();
}
template <class Chunk>
@@ -62,34 +62,50 @@ FreeList<Chunk>::FreeList(Chunk* fc) :
{
_size = fc->size();
_count = 1;
- _hint = 0;
- init_statistics();
-#ifndef PRODUCT
- _allocation_stats.set_returned_bytes(size() * HeapWordSize);
-#endif
}
template <class Chunk>
-void FreeList<Chunk>::reset(size_t hint) {
+void FreeList<Chunk>::link_head(Chunk* v) {
+ assert_proper_lock_protection();
+ set_head(v);
+ // If this method is not used (just set the head instead),
+ // this check can be avoided.
+ if (v != NULL) {
+ v->link_prev(NULL);
+ }
+}
+
+
+
+template <class Chunk>
+void FreeList<Chunk>::reset() {
+ // Don't set the _size to 0 because this method is
+ // used with a existing list that has a size but which has
+ // been emptied.
+ // Don't clear the _protecting_lock of an existing list.
set_count(0);
set_head(NULL);
set_tail(NULL);
- set_hint(hint);
}
template <class Chunk>
-void FreeList<Chunk>::init_statistics(bool split_birth) {
- _allocation_stats.initialize(split_birth);
+void FreeList<Chunk>::initialize() {
+#ifdef ASSERT
+ // Needed early because it might be checked in other initializing code.
+ set_protecting_lock(NULL);
+#endif
+ reset();
+ set_size(0);
}
-template <class Chunk>
-Chunk* FreeList<Chunk>::get_chunk_at_head() {
+template <class Chunk_t>
+Chunk_t* FreeList<Chunk_t>::get_chunk_at_head() {
assert_proper_lock_protection();
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
- Chunk* fc = head();
+ Chunk_t* fc = head();
if (fc != NULL) {
- Chunk* nextFC = fc->next();
+ Chunk_t* nextFC = fc->next();
if (nextFC != NULL) {
// The chunk fc being removed has a "next". Set the "next" to the
// "prev" of fc.
@@ -197,11 +213,6 @@ void FreeList<Chunk>::return_chunk_at_head(Chunk* chunk, bool record_return) {
link_tail(chunk);
}
increment_count(); // of # of chunks in list
- DEBUG_ONLY(
- if (record_return) {
- increment_returned_bytes_by(size()*HeapWordSize);
- }
- )
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
assert(head() == NULL || head()->size() == size(), "wrong item on list");
@@ -233,11 +244,6 @@ void FreeList<Chunk>::return_chunk_at_tail(Chunk* chunk, bool record_return) {
}
link_tail(chunk);
increment_count(); // of # of chunks in list
- DEBUG_ONLY(
- if (record_return) {
- increment_returned_bytes_by(size()*HeapWordSize);
- }
- )
assert(head() == NULL || head()->prev() == NULL, "list invariant");
assert(tail() == NULL || tail()->next() == NULL, "list invariant");
assert(head() == NULL || head()->size() == size(), "wrong item on list");
@@ -273,7 +279,7 @@ void FreeList<Chunk>::prepend(FreeList<Chunk>* fl) {
}
}
-// verify_chunk_in_free_list() is used to verify that an item is in this free list.
+// verify_chunk_in_free_lists() is used to verify that an item is in this free list.
// It is used as a debugging aid.
template <class Chunk>
bool FreeList<Chunk>::verify_chunk_in_free_list(Chunk* fc) const {
@@ -294,40 +300,14 @@ bool FreeList<Chunk>::verify_chunk_in_free_list(Chunk* fc) const {
#ifndef PRODUCT
template <class Chunk>
-void FreeList<Chunk>::verify_stats() const {
- // The +1 of the LH comparand is to allow some "looseness" in
- // checking: we usually call this interface when adding a block
- // and we'll subsequently update the stats; we cannot update the
- // stats beforehand because in the case of the large-block BT
- // dictionary for example, this might be the first block and
- // in that case there would be no place that we could record
- // the stats (which are kept in the block itself).
- assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births()
- + _allocation_stats.coal_births() + 1) // Total Production Stock + 1
- >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths()
- + (ssize_t)count()), // Total Current Stock + depletion
- err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
- " violates Conservation Principle: "
- "prev_sweep(" SIZE_FORMAT ")"
- " + split_births(" SIZE_FORMAT ")"
- " + coal_births(" SIZE_FORMAT ") + 1 >= "
- " split_deaths(" SIZE_FORMAT ")"
- " coal_deaths(" SIZE_FORMAT ")"
- " + count(" SSIZE_FORMAT ")",
- this, _size, _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
- _allocation_stats.split_births(), _allocation_stats.split_deaths(),
- _allocation_stats.coal_deaths(), count()));
-}
-
-template <class Chunk>
void FreeList<Chunk>::assert_proper_lock_protection_work() const {
- assert(_protecting_lock != NULL, "Don't call this directly");
+ assert(protecting_lock() != NULL, "Don't call this directly");
assert(ParallelGCThreads > 0, "Don't call this directly");
Thread* thr = Thread::current();
if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
// assert that we are holding the freelist lock
} else if (thr->is_GC_task_thread()) {
- assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+ assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED");
} else if (thr->is_Java_thread()) {
assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
} else {
@@ -350,21 +330,17 @@ void FreeList<Chunk>::print_labels_on(outputStream* st, const char* c) {
// to the call is a non-null string, it is printed in the first column;
// otherwise, if the argument is null (the default), then the size of the
// (free list) block is printed in the first column.
-template <class Chunk>
-void FreeList<Chunk>::print_on(outputStream* st, const char* c) const {
+template <class Chunk_t>
+void FreeList<Chunk_t>::print_on(outputStream* st, const char* c) const {
if (c != NULL) {
st->print("%16s", c);
} else {
st->print(SIZE_FORMAT_W(16), size());
}
- st->print("\t"
- SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
- SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
- bfr_surp(), surplus(), desired(), prev_sweep(), before_sweep(),
- count(), coal_births(), coal_deaths(), split_births(), split_deaths());
}
+template class FreeList<Metablock>;
+template class FreeList<Metachunk>;
#ifndef SERIALGC
-// Needs to be after the definitions have been seen.
template class FreeList<FreeChunk>;
#endif // SERIALGC
diff --git a/src/share/vm/memory/freeList.hpp b/src/share/vm/memory/freeList.hpp
index a982cfbda..37438cc38 100644
--- a/src/share/vm/memory/freeList.hpp
+++ b/src/share/vm/memory/freeList.hpp
@@ -40,23 +40,19 @@ class CompactibleFreeListSpace;
// for that implementation.
class Mutex;
-template <class Chunk> class TreeList;
-template <class Chunk> class PrintTreeCensusClosure;
-template <class Chunk>
+template <class Chunk_t>
class FreeList VALUE_OBJ_CLASS_SPEC {
friend class CompactibleFreeListSpace;
friend class VMStructs;
- friend class PrintTreeCensusClosure<Chunk>;
private:
- Chunk* _head; // Head of list of free chunks
- Chunk* _tail; // Tail of list of free chunks
+ Chunk_t* _head; // Head of list of free chunks
+ Chunk_t* _tail; // Tail of list of free chunks
size_t _size; // Size in Heap words of each chunk
ssize_t _count; // Number of entries in list
- size_t _hint; // next larger size list with a positive surplus
- AllocationStats _allocation_stats; // allocation-related statistics
+ protected:
#ifdef ASSERT
Mutex* _protecting_lock;
@@ -71,10 +67,6 @@ class FreeList VALUE_OBJ_CLASS_SPEC {
#endif
}
- // Initialize the allocation statistics.
- protected:
- void init_statistics(bool split_birth = false);
- void set_count(ssize_t v) { _count = v;}
void increment_count() {
_count++;
}
@@ -89,52 +81,48 @@ class FreeList VALUE_OBJ_CLASS_SPEC {
// Construct a list without any entries.
FreeList();
// Construct a list with "fc" as the first (and lone) entry in the list.
- FreeList(Chunk* fc);
+ FreeList(Chunk_t* fc);
- // Reset the head, tail, hint, and count of a free list.
- void reset(size_t hint);
+ // Do initialization
+ void initialize();
+
+ // Reset the head, tail, and count of a free list.
+ void reset();
// Declare the current free list to be protected by the given lock.
#ifdef ASSERT
- void set_protecting_lock(Mutex* protecting_lock) {
- _protecting_lock = protecting_lock;
+ Mutex* protecting_lock() const { return _protecting_lock; }
+ void set_protecting_lock(Mutex* v) {
+ _protecting_lock = v;
}
#endif
// Accessors.
- Chunk* head() const {
+ Chunk_t* head() const {
assert_proper_lock_protection();
return _head;
}
- void set_head(Chunk* v) {
+ void set_head(Chunk_t* v) {
assert_proper_lock_protection();
_head = v;
assert(!_head || _head->size() == _size, "bad chunk size");
}
// Set the head of the list and set the prev field of non-null
// values to NULL.
- void link_head(Chunk* v) {
- assert_proper_lock_protection();
- set_head(v);
- // If this method is not used (just set the head instead),
- // this check can be avoided.
- if (v != NULL) {
- v->link_prev(NULL);
- }
- }
+ void link_head(Chunk_t* v);
- Chunk* tail() const {
+ Chunk_t* tail() const {
assert_proper_lock_protection();
return _tail;
}
- void set_tail(Chunk* v) {
+ void set_tail(Chunk_t* v) {
assert_proper_lock_protection();
_tail = v;
assert(!_tail || _tail->size() == _size, "bad chunk size");
}
// Set the tail of the list and set the next field of non-null
// values to NULL.
- void link_tail(Chunk* v) {
+ void link_tail(Chunk_t* v) {
assert_proper_lock_protection();
set_tail(v);
if (v != NULL) {
@@ -152,174 +140,45 @@ class FreeList VALUE_OBJ_CLASS_SPEC {
assert_proper_lock_protection();
_size = v;
}
- ssize_t count() const {
- return _count;
- }
- size_t hint() const {
- return _hint;
- }
- void set_hint(size_t v) {
- assert_proper_lock_protection();
- assert(v == 0 || _size < v, "Bad hint"); _hint = v;
- }
-
- // Accessors for statistics
- AllocationStats* allocation_stats() {
- assert_proper_lock_protection();
- return &_allocation_stats;
- }
-
- ssize_t desired() const {
- return _allocation_stats.desired();
- }
- void set_desired(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_desired(v);
- }
- void compute_desired(float inter_sweep_current,
- float inter_sweep_estimate,
- float intra_sweep_estimate) {
- assert_proper_lock_protection();
- _allocation_stats.compute_desired(_count,
- inter_sweep_current,
- inter_sweep_estimate,
- intra_sweep_estimate);
- }
- ssize_t coal_desired() const {
- return _allocation_stats.coal_desired();
- }
- void set_coal_desired(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_coal_desired(v);
- }
-
- ssize_t surplus() const {
- return _allocation_stats.surplus();
- }
- void set_surplus(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_surplus(v);
- }
- void increment_surplus() {
- assert_proper_lock_protection();
- _allocation_stats.increment_surplus();
- }
- void decrement_surplus() {
- assert_proper_lock_protection();
- _allocation_stats.decrement_surplus();
- }
-
- ssize_t bfr_surp() const {
- return _allocation_stats.bfr_surp();
- }
- void set_bfr_surp(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_bfr_surp(v);
- }
- ssize_t prev_sweep() const {
- return _allocation_stats.prev_sweep();
- }
- void set_prev_sweep(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_prev_sweep(v);
- }
- ssize_t before_sweep() const {
- return _allocation_stats.before_sweep();
- }
- void set_before_sweep(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_before_sweep(v);
- }
-
- ssize_t coal_births() const {
- return _allocation_stats.coal_births();
- }
- void set_coal_births(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_coal_births(v);
- }
- void increment_coal_births() {
- assert_proper_lock_protection();
- _allocation_stats.increment_coal_births();
- }
-
- ssize_t coal_deaths() const {
- return _allocation_stats.coal_deaths();
- }
- void set_coal_deaths(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_coal_deaths(v);
- }
- void increment_coal_deaths() {
- assert_proper_lock_protection();
- _allocation_stats.increment_coal_deaths();
- }
-
- ssize_t split_births() const {
- return _allocation_stats.split_births();
- }
- void set_split_births(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_split_births(v);
- }
- void increment_split_births() {
- assert_proper_lock_protection();
- _allocation_stats.increment_split_births();
- }
+ ssize_t count() const { return _count; }
+ void set_count(ssize_t v) { _count = v;}
- ssize_t split_deaths() const {
- return _allocation_stats.split_deaths();
- }
- void set_split_deaths(ssize_t v) {
- assert_proper_lock_protection();
- _allocation_stats.set_split_deaths(v);
- }
- void increment_split_deaths() {
- assert_proper_lock_protection();
- _allocation_stats.increment_split_deaths();
- }
+ size_t get_better_size() { return size(); }
- NOT_PRODUCT(
- // For debugging. The "_returned_bytes" in all the lists are summed
- // and compared with the total number of bytes swept during a
- // collection.
- size_t returned_bytes() const { return _allocation_stats.returned_bytes(); }
- void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); }
- void increment_returned_bytes_by(size_t v) {
- _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v);
- }
- )
+ size_t returned_bytes() const { ShouldNotReachHere(); return 0; }
+ void set_returned_bytes(size_t v) {}
+ void increment_returned_bytes_by(size_t v) {}
// Unlink head of list and return it. Returns NULL if
// the list is empty.
- Chunk* get_chunk_at_head();
+ Chunk_t* get_chunk_at_head();
// Remove the first "n" or "count", whichever is smaller, chunks from the
// list, setting "fl", which is required to be empty, to point to them.
- void getFirstNChunksFromList(size_t n, FreeList<Chunk>* fl);
+ void getFirstNChunksFromList(size_t n, FreeList<Chunk_t>* fl);
// Unlink this chunk from it's free list
- void remove_chunk(Chunk* fc);
+ void remove_chunk(Chunk_t* fc);
// Add this chunk to this free list.
- void return_chunk_at_head(Chunk* fc);
- void return_chunk_at_tail(Chunk* fc);
+ void return_chunk_at_head(Chunk_t* fc);
+ void return_chunk_at_tail(Chunk_t* fc);
// Similar to returnChunk* but also records some diagnostic
// information.
- void return_chunk_at_head(Chunk* fc, bool record_return);
- void return_chunk_at_tail(Chunk* fc, bool record_return);
+ void return_chunk_at_head(Chunk_t* fc, bool record_return);
+ void return_chunk_at_tail(Chunk_t* fc, bool record_return);
// Prepend "fl" (whose size is required to be the same as that of "this")
// to the front of "this" list.
- void prepend(FreeList<Chunk>* fl);
+ void prepend(FreeList<Chunk_t>* fl);
// Verify that the chunk is in the list.
// found. Return NULL if "fc" is not found.
- bool verify_chunk_in_free_list(Chunk* fc) const;
+ bool verify_chunk_in_free_list(Chunk_t* fc) const;
// Stats verification
- void verify_stats() const PRODUCT_RETURN;
+// void verify_stats() const { ShouldNotReachHere(); };
// Printing support
static void print_labels_on(outputStream* st, const char* c);
diff --git a/src/share/vm/memory/metablock.hpp b/src/share/vm/memory/metablock.hpp
new file mode 100644
index 000000000..220d36148
--- /dev/null
+++ b/src/share/vm/memory/metablock.hpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_MEMORY_METABLOCK_HPP
+#define SHARE_VM_MEMORY_METABLOCK_HPP
+
+// Metablock are the unit of allocation from a Chunk. It is initialized
+// with the size of the requested allocation. That size is overwritten
+// once the allocation returns.
+//
+// A Metablock may be reused by its SpaceManager but are never moved between
+// SpaceManagers. There is no explicit link to the Metachunk
+// from which it was allocated. Metablock may be deallocated and
+// put on a freelist but the space is never freed, rather
+// the Metachunk it is a part of will be deallocated when it's
+// associated class loader is collected.
+
+class Metablock VALUE_OBJ_CLASS_SPEC {
+ friend class VMStructs;
+ private:
+ // Used to align the allocation (see below).
+ union block_t {
+ void* _data[3];
+ struct header_t {
+ size_t _word_size;
+ Metablock* _next;
+ Metablock* _prev;
+ } _header;
+ } _block;
+ static size_t _min_block_byte_size;
+ static size_t _overhead;
+
+ typedef union block_t Block;
+ typedef struct header_t Header;
+ const Block* block() const { return &_block; }
+ const Block::header_t* header() const { return &(block()->_header); }
+ public:
+
+ static Metablock* initialize(MetaWord* p, size_t word_size);
+
+ // This places the body of the block at a 2 word boundary
+ // because every block starts on a 2 word boundary. Work out
+ // how to make the body on a 2 word boundary if the block
+ // starts on a arbitrary boundary. JJJ
+
+ size_t word_size() const { return header()->_word_size; }
+ void set_word_size(size_t v) { _block._header._word_size = v; }
+ size_t size() const volatile { return _block._header._word_size; }
+ void set_size(size_t v) { _block._header._word_size = v; }
+ Metablock* next() const { return header()->_next; }
+ void set_next(Metablock* v) { _block._header._next = v; }
+ Metablock* prev() const { return header()->_prev; }
+ void set_prev(Metablock* v) { _block._header._prev = v; }
+
+ static size_t min_block_byte_size() { return _min_block_byte_size; }
+ static size_t overhead() { return _overhead; }
+
+ bool is_free() { return header()->_word_size != 0; }
+ void clear_next() { set_next(NULL); }
+ void link_prev(Metablock* ptr) { set_prev(ptr); }
+ uintptr_t* end() { return ((uintptr_t*) this) + size(); }
+ bool cantCoalesce() const { return false; }
+ void link_next(Metablock* ptr) { set_next(ptr); }
+ void link_after(Metablock* ptr){
+ link_next(ptr);
+ if (ptr != NULL) ptr->link_prev(this);
+ }
+
+ // Should not be needed in a free list of Metablocks
+ void markNotFree() { ShouldNotReachHere(); }
+
+ // Debug support
+#ifdef ASSERT
+ void* prev_addr() const { return (void*)&_block._header._prev; }
+ void* next_addr() const { return (void*)&_block._header._next; }
+ void* size_addr() const { return (void*)&_block._header._word_size; }
+#endif
+ bool verify_chunk_in_free_list(Metablock* tc) const { return true; }
+ bool verify_par_locked() { return true; }
+
+ void assert_is_mangled() const {/* Don't check "\*/}
+};
+#endif // SHARE_VM_MEMORY_METABLOCK_HPP
diff --git a/src/share/vm/memory/metachunk.hpp b/src/share/vm/memory/metachunk.hpp
new file mode 100644
index 000000000..dd461972a
--- /dev/null
+++ b/src/share/vm/memory/metachunk.hpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_MEMORY_METACHUNK_HPP
+#define SHARE_VM_MEMORY_METACHUNK_HPP
+
+// Metachunk - Quantum of allocation from a Virtualspace
+// Metachunks are reused (when freed are put on a global freelist) and
+// have no permanent association to a SpaceManager.
+
+// +--------------+ <- end
+// | | --+ ---+
+// | | | free |
+// | | | |
+// | | | | capacity
+// | | | |
+// | | <- top --+ |
+// | | ---+ |
+// | | | used |
+// | | | |
+// | | | |
+// +--------------+ <- bottom ---+ ---+
+
+class Metachunk VALUE_OBJ_CLASS_SPEC {
+ // link to support lists of chunks
+ Metachunk* _next;
+ Metachunk* _prev;
+
+ MetaWord* _bottom;
+ MetaWord* _end;
+ MetaWord* _top;
+ size_t _word_size;
+ // Used in a guarantee() so included in the Product builds
+ // even through it is only for debugging.
+ bool _is_free;
+
+ // Metachunks are allocated out of a MetadataVirtualSpace and
+ // and use some of its space to describe itself (plus alignment
+ // considerations). Metadata is allocated in the rest of the chunk.
+ // This size is the overhead of maintaining the Metachunk within
+ // the space.
+ static size_t _overhead;
+
+ void set_bottom(MetaWord* v) { _bottom = v; }
+ void set_end(MetaWord* v) { _end = v; }
+ void set_top(MetaWord* v) { _top = v; }
+ void set_word_size(size_t v) { _word_size = v; }
+ public:
+#ifdef ASSERT
+ Metachunk() : _bottom(NULL), _end(NULL), _top(NULL), _is_free(false) {}
+#else
+ Metachunk() : _bottom(NULL), _end(NULL), _top(NULL) {}
+#endif
+
+ // Used to add a Metachunk to a list of Metachunks
+ void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");}
+ void set_prev(Metachunk* v) { _prev = v; assert(v != this, "Boom");}
+
+ MetaWord* allocate(size_t word_size);
+ static Metachunk* initialize(MetaWord* ptr, size_t word_size);
+
+ // Accessors
+ Metachunk* next() const { return _next; }
+ Metachunk* prev() const { return _prev; }
+ MetaWord* bottom() const { return _bottom; }
+ MetaWord* end() const { return _end; }
+ MetaWord* top() const { return _top; }
+ size_t word_size() const { return _word_size; }
+ size_t size() const volatile { return _word_size; }
+ void set_size(size_t v) { _word_size = v; }
+ bool is_free() { return _is_free; }
+ void set_is_free(bool v) { _is_free = v; }
+ static size_t overhead() { return _overhead; }
+ void clear_next() { set_next(NULL); }
+ void link_prev(Metachunk* ptr) { set_prev(ptr); }
+ uintptr_t* end() { return ((uintptr_t*) this) + size(); }
+ bool cantCoalesce() const { return false; }
+ void link_next(Metachunk* ptr) { set_next(ptr); }
+ void link_after(Metachunk* ptr){
+ link_next(ptr);
+ if (ptr != NULL) ptr->link_prev(this);
+ }
+
+ // Reset top to bottom so chunk can be reused.
+ void reset_empty() { _top = (_bottom + _overhead); }
+ bool is_empty() { return _top == (_bottom + _overhead); }
+
+ // used (has been allocated)
+ // free (available for future allocations)
+ // capacity (total size of chunk)
+ size_t used_word_size();
+ size_t free_word_size();
+ size_t capacity_word_size();
+
+ // Debug support
+#ifdef ASSERT
+ void* prev_addr() const { return (void*)&_prev; }
+ void* next_addr() const { return (void*)&_next; }
+ void* size_addr() const { return (void*)&_word_size; }
+#endif
+ bool verify_chunk_in_free_list(Metachunk* tc) const { return true; }
+ bool verify_par_locked() { return true; }
+
+ void assert_is_mangled() const {/* Don't check "\*/}
+
+#ifdef ASSERT
+ void mangle();
+#endif // ASSERT
+
+ void print_on(outputStream* st) const;
+ void verify();
+};
+#endif // SHARE_VM_MEMORY_METACHUNK_HPP
diff --git a/src/share/vm/memory/metaspace.cpp b/src/share/vm/memory/metaspace.cpp
index 0396eef2d..fc2609f2a 100644
--- a/src/share/vm/memory/metaspace.cpp
+++ b/src/share/vm/memory/metaspace.cpp
@@ -24,9 +24,12 @@
#include "precompiled.hpp"
#include "gc_interface/collectedHeap.hpp"
#include "memory/binaryTreeDictionary.hpp"
+#include "memory/freeList.hpp"
#include "memory/collectorPolicy.hpp"
#include "memory/filemap.hpp"
#include "memory/freeList.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
#include "memory/metaspace.hpp"
#include "memory/metaspaceShared.hpp"
#include "memory/resourceArea.hpp"
@@ -37,15 +40,8 @@
#include "utilities/copy.hpp"
#include "utilities/debug.hpp"
-// Define this macro to deallocate Metablock. If not defined,
-// blocks are not yet deallocated and are only mangled.
-#undef DEALLOCATE_BLOCKS
-
-// Easily recognizable patterns
-// These patterns can be the same in 32bit or 64bit since
-// they only have to be easily recognizable.
-const void* metaspace_allocation_leader = (void*) 0X11111111;
-const void* metaspace_allocation_trailer = (void*) 0X77777777;
+typedef BinaryTreeDictionary<Metablock, FreeList> BlockTreeDictionary;
+typedef BinaryTreeDictionary<Metachunk, FreeList> ChunkTreeDictionary;
// Parameters for stress mode testing
const uint metadata_deallocate_a_lot_block = 10;
@@ -53,7 +49,6 @@ const uint metadata_deallocate_a_lock_chunk = 3;
size_t const allocation_from_dictionary_limit = 64 * K;
const size_t metadata_chunk_initialize = 0xf7f7f7f7;
const size_t metadata_deallocate = 0xf5f5f5f5;
-const size_t metadata_space_manager_allocate = 0xf3f3f3f3;
MetaWord* last_allocated = 0;
@@ -62,11 +57,12 @@ enum ChunkIndex {
SmallIndex = 0,
MediumIndex = 1,
HumongousIndex = 2,
- NumberOfFreeLists = 3
+ NumberOfFreeLists = 2,
+ NumberOfInUseLists = 3
};
static ChunkIndex next_chunk_index(ChunkIndex i) {
- assert(i < NumberOfFreeLists, "Out of bound");
+ assert(i < NumberOfInUseLists, "Out of bound");
return (ChunkIndex) (i+1);
}
@@ -100,164 +96,13 @@ bool MetaspaceGC::_should_concurrent_collect = false;
// the Chunk after the header for the Chunk) where as Metachunks
// point to space in a VirtualSpace. To replace Metachunks with
// Chunks, change Chunks so that they can be allocated out of a VirtualSpace.
-//
-
-// Metablock are the unit of allocation from a Chunk. It contains
-// the size of the requested allocation in a debug build.
-// Also in a debug build it has a marker before and after the
-// body of the block. The address of the body is the address returned
-// by the allocation.
-//
-// Layout in a debug build. In a product build only the body is present.
-//
-// +-----------+-----------+------------+ +-----------+
-// | word size | leader | body | ... | trailer |
-// +-----------+-----------+------------+ +-----------+
-//
-// A Metablock may be reused by its SpaceManager but are never moved between
-// SpaceManagers. There is no explicit link to the Metachunk
-// from which it was allocated. Metablock are not deallocated, rather
-// the Metachunk it is a part of will be deallocated when it's
-// associated class loader is collected.
-//
-// When the word size of a block is passed in to the deallocation
-// call the word size no longer needs to be part of a Metablock.
-
-class Metablock {
- friend class VMStructs;
- private:
- // Used to align the allocation (see below) and for debugging.
-#ifdef ASSERT
- struct {
- size_t _word_size;
- void* _leader;
- } _header;
- void* _data[1];
-#endif
- static size_t _overhead;
-
-#ifdef ASSERT
- void set_word_size(size_t v) { _header._word_size = v; }
- void* leader() { return _header._leader; }
- void* trailer() {
- jlong index = (jlong) _header._word_size - sizeof(_header)/BytesPerWord - 1;
- assert(index > 0, err_msg("Bad indexling of trailer %d", index));
- void** ptr = &_data[index];
- return *ptr;
- }
- void set_leader(void* v) { _header._leader = v; }
- void set_trailer(void* v) {
- void** ptr = &_data[_header._word_size - sizeof(_header)/BytesPerWord - 1];
- *ptr = v;
- }
- public:
- size_t word_size() { return _header._word_size; }
-#endif
- public:
-
- static Metablock* initialize(MetaWord* p, size_t word_size);
-
- // This places the body of the block at a 2 word boundary
- // because every block starts on a 2 word boundary. Work out
- // how to make the body on a 2 word boundary if the block
- // starts on a arbitrary boundary. JJJ
-
-#ifdef ASSERT
- MetaWord* data() { return (MetaWord*) &_data[0]; }
-#else
- MetaWord* data() { return (MetaWord*) this; }
-#endif
- static Metablock* metablock_from_data(MetaWord* p) {
+size_t Metablock::_min_block_byte_size = sizeof(Metablock);
#ifdef ASSERT
- size_t word_offset = offset_of(Metablock, _data)/BytesPerWord;
- Metablock* result = (Metablock*) (p - word_offset);
- return result;
+ size_t Metablock::_overhead =
+ Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord;
#else
- return (Metablock*) p;
+ size_t Metablock::_overhead = 0;
#endif
- }
-
- static size_t overhead() { return _overhead; }
- void verify();
-};
-
-// Metachunk - Quantum of allocation from a Virtualspace
-// Metachunks are reused (when freed are put on a global freelist) and
-// have no permanent association to a SpaceManager.
-
-// +--------------+ <- end
-// | | --+ ---+
-// | | | free |
-// | | | |
-// | | | | capacity
-// | | | |
-// | | <- top --+ |
-// | | ---+ |
-// | | | used |
-// | | | |
-// | | | |
-// +--------------+ <- bottom ---+ ---+
-
-class Metachunk VALUE_OBJ_CLASS_SPEC {
- // link to support lists of chunks
- Metachunk* _next;
-
- MetaWord* _bottom;
- MetaWord* _end;
- MetaWord* _top;
- size_t _word_size;
-
- // Metachunks are allocated out of a MetadataVirtualSpace and
- // and use some of its space to describe itself (plus alignment
- // considerations). Metadata is allocated in the rest of the chunk.
- // This size is the overhead of maintaining the Metachunk within
- // the space.
- static size_t _overhead;
-
- void set_bottom(MetaWord* v) { _bottom = v; }
- void set_end(MetaWord* v) { _end = v; }
- void set_top(MetaWord* v) { _top = v; }
- void set_word_size(size_t v) { _word_size = v; }
- public:
-
- // Used to add a Metachunk to a list of Metachunks
- void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");}
-
- Metablock* allocate(size_t word_size);
- static Metachunk* initialize(MetaWord* ptr, size_t word_size);
-
- // Accessors
- Metachunk* next() const { return _next; }
- MetaWord* bottom() const { return _bottom; }
- MetaWord* end() const { return _end; }
- MetaWord* top() const { return _top; }
- size_t word_size() const { return _word_size; }
- static size_t overhead() { return _overhead; }
-
- // Reset top to bottom so chunk can be reused.
- void reset_empty() { _top = (_bottom + _overhead); }
- bool is_empty() { return _top == (_bottom + _overhead); }
-
- // used (has been allocated)
- // free (available for future allocations)
- // capacity (total size of chunk)
- size_t used_word_size();
- size_t free_word_size();
- size_t capacity_word_size();
-
-#ifdef ASSERT
- void mangle() {
- // Mangle the payload of the chunk and not the links that
- // maintain list of chunks.
- HeapWord* start = (HeapWord*)(bottom() + overhead());
- size_t word_size = capacity_word_size() - overhead();
- Copy::fill_to_words(start, word_size, metadata_chunk_initialize);
- }
-#endif // ASSERT
-
- void print_on(outputStream* st) const;
- void verify();
-};
// Pointer to list of Metachunks.
@@ -292,7 +137,10 @@ class ChunkManager VALUE_OBJ_CLASS_SPEC {
// SmallChunk
// MediumChunk
// HumongousChunk
- ChunkList _free_chunks[3];
+ ChunkList _free_chunks[NumberOfFreeLists];
+
+ // HumongousChunk
+ ChunkTreeDictionary _humongous_dictionary;
// ChunkManager in all lists of this type
size_t _free_chunks_total;
@@ -337,7 +185,9 @@ class ChunkManager VALUE_OBJ_CLASS_SPEC {
}
ChunkList* free_medium_chunks() { return &_free_chunks[1]; }
ChunkList* free_small_chunks() { return &_free_chunks[0]; }
- ChunkList* free_humongous_chunks() { return &_free_chunks[2]; }
+ ChunkTreeDictionary* humongous_dictionary() {
+ return &_humongous_dictionary;
+ }
ChunkList* free_chunks(ChunkIndex index);
@@ -356,41 +206,35 @@ class ChunkManager VALUE_OBJ_CLASS_SPEC {
void locked_print_free_chunks(outputStream* st);
void locked_print_sum_free_chunks(outputStream* st);
+
+ void print_on(outputStream* st);
};
// Used to manage the free list of Metablocks (a block corresponds
// to the allocation of a quantum of metadata).
class BlockFreelist VALUE_OBJ_CLASS_SPEC {
-#ifdef DEALLOCATE_BLOCKS
- BinaryTreeDictionary<Metablock>* _dictionary;
-#endif
- static Metablock* initialize_free_chunk(Metablock* block, size_t word_size);
+ BlockTreeDictionary* _dictionary;
+ static Metablock* initialize_free_chunk(MetaWord* p, size_t word_size);
-#ifdef DEALLOCATE_BLOCKS
// Accessors
- BinaryTreeDictionary<Metablock>* dictionary() const { return _dictionary; }
-#endif
+ BlockTreeDictionary* dictionary() const { return _dictionary; }
public:
BlockFreelist();
~BlockFreelist();
// Get and return a block to the free list
- Metablock* get_block(size_t word_size);
- void return_block(Metablock* block, size_t word_size);
+ MetaWord* get_block(size_t word_size);
+ void return_block(MetaWord* p, size_t word_size);
- size_t totalSize() {
-#ifdef DEALLOCATE_BLOCKS
- if (dictionary() == NULL) {
- return 0;
- } else {
- return dictionary()->totalSize();
- }
-#else
+ size_t total_size() {
+ if (dictionary() == NULL) {
return 0;
-#endif
+ } else {
+ return dictionary()->total_size();
}
+}
void print_on(outputStream* st) const;
};
@@ -600,7 +444,6 @@ class VirtualSpaceList : public CHeapObj<mtClass> {
};
};
-
class Metadebug : AllStatic {
// Debugging support for Metaspaces
static int _deallocate_block_a_lot_count;
@@ -655,7 +498,7 @@ class SpaceManager : public CHeapObj<mtClass> {
// List of chunks in use by this SpaceManager. Allocations
// are done from the current chunk. The list is used for deallocating
// chunks when the SpaceManager is freed.
- Metachunk* _chunks_in_use[NumberOfFreeLists];
+ Metachunk* _chunks_in_use[NumberOfInUseLists];
Metachunk* _current_chunk;
// Virtual space where allocation comes from.
@@ -700,24 +543,6 @@ class SpaceManager : public CHeapObj<mtClass> {
// Add chunk to the list of chunks in use
void add_chunk(Metachunk* v, bool make_current);
- // Debugging support
- void verify_chunks_in_use_index(ChunkIndex index, Metachunk* v) {
- switch (index) {
- case 0:
- assert(v->word_size() == SmallChunk, "Not a SmallChunk");
- break;
- case 1:
- assert(v->word_size() == MediumChunk, "Not a MediumChunk");
- break;
- case 2:
- assert(v->word_size() > MediumChunk, "Not a HumongousChunk");
- break;
- default:
- assert(false, "Wrong list.");
- }
- }
-
- protected:
Mutex* lock() const { return _lock; }
public:
@@ -751,10 +576,10 @@ class SpaceManager : public CHeapObj<mtClass> {
MetaWord* allocate(size_t word_size);
// Helper for allocations
- Metablock* allocate_work(size_t word_size);
+ MetaWord* allocate_work(size_t word_size);
// Returns a block to the per manager freelist
- void deallocate(MetaWord* p);
+ void deallocate(MetaWord* p, size_t word_size);
// Based on the allocation size and a minimum chunk size,
// returned chunk size (for expanding space for chunk allocation).
@@ -763,7 +588,7 @@ class SpaceManager : public CHeapObj<mtClass> {
// Called when an allocation from the current chunk fails.
// Gets a new chunk (may require getting a new virtual space),
// and allocates from that chunk.
- Metablock* grow_and_allocate(size_t word_size);
+ MetaWord* grow_and_allocate(size_t word_size);
// debugging support.
@@ -780,6 +605,8 @@ class SpaceManager : public CHeapObj<mtClass> {
uint const SpaceManager::_small_chunk_limit = 4;
+
+
const char* SpaceManager::_expand_lock_name =
"SpaceManager chunk allocation lock";
const int SpaceManager::_expand_lock_rank = Monitor::leaf - 1;
@@ -788,39 +615,26 @@ Mutex* const SpaceManager::_expand_lock =
SpaceManager::_expand_lock_name,
Mutex::_allow_vm_block_flag);
-#ifdef ASSERT
-size_t Metablock::_overhead =
- Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord;
-#else
-size_t Metablock::_overhead = 0;
-#endif
size_t Metachunk::_overhead =
Chunk::aligned_overhead_size(sizeof(Metachunk)) / BytesPerWord;
// New blocks returned by the Metaspace are zero initialized.
// We should fix the constructors to not assume this instead.
Metablock* Metablock::initialize(MetaWord* p, size_t word_size) {
+ if (p == NULL) {
+ return NULL;
+ }
+
Metablock* result = (Metablock*) p;
// Clear the memory
Copy::fill_to_aligned_words((HeapWord*)result, word_size);
#ifdef ASSERT
result->set_word_size(word_size);
- // Check after work size is set.
- result->set_leader((void*) metaspace_allocation_leader);
- result->set_trailer((void*) metaspace_allocation_trailer);
#endif
return result;
}
-void Metablock::verify() {
-#ifdef ASSERT
- assert(leader() == metaspace_allocation_leader &&
- trailer() == metaspace_allocation_trailer,
- "block has been corrupted");
-#endif
-}
-
// Metachunk methods
Metachunk* Metachunk::initialize(MetaWord* ptr, size_t word_size) {
@@ -843,18 +657,13 @@ Metachunk* Metachunk::initialize(MetaWord* ptr, size_t word_size) {
}
-Metablock* Metachunk::allocate(size_t word_size) {
- Metablock* result = NULL;
+MetaWord* Metachunk::allocate(size_t word_size) {
+ MetaWord* result = NULL;
// If available, bump the pointer to allocate.
if (free_word_size() >= word_size) {
- result = Metablock::initialize(_top, word_size);
+ result = _top;
_top = _top + word_size;
}
-#ifdef ASSERT
- assert(result == NULL ||
- result->word_size() == word_size,
- "Block size is not set correctly");
-#endif
return result;
}
@@ -878,103 +687,85 @@ void Metachunk::print_on(outputStream* st) const {
bottom(), top(), end(), word_size());
}
+#ifdef ASSERT
+void Metachunk::mangle() {
+ // Mangle the payload of the chunk and not the links that
+ // maintain list of chunks.
+ HeapWord* start = (HeapWord*)(bottom() + overhead());
+ size_t word_size = capacity_word_size() - overhead();
+ Copy::fill_to_words(start, word_size, metadata_chunk_initialize);
+}
+#endif // ASSERT
void Metachunk::verify() {
#ifdef ASSERT
// Cannot walk through the blocks unless the blocks have
// headers with sizes.
- MetaWord* curr = bottom() + overhead();
- while (curr < top()) {
- Metablock* block = (Metablock*) curr;
- size_t word_size = block->word_size();
- block->verify();
- curr = curr + word_size;
- }
+ assert(_bottom <= _top &&
+ _top <= _end,
+ "Chunk has been smashed");
+ assert(SpaceManager::is_humongous(_word_size) ||
+ _word_size == SpaceManager::MediumChunk ||
+ _word_size == SpaceManager::SmallChunk,
+ "Chunk size is wrong");
#endif
return;
}
// BlockFreelist methods
-#ifdef DEALLOCATE_BLOCKS
BlockFreelist::BlockFreelist() : _dictionary(NULL) {}
-#else
-BlockFreelist::BlockFreelist() {}
-#endif
BlockFreelist::~BlockFreelist() {
-#ifdef DEALLOCATE_BLOCKS
if (_dictionary != NULL) {
if (Verbose && TraceMetadataChunkAllocation) {
_dictionary->print_free_lists(gclog_or_tty);
}
delete _dictionary;
}
-#endif
}
-Metablock* BlockFreelist::initialize_free_chunk(Metablock* block, size_t word_size) {
-#ifdef DEALLOCATE_BLOCKS
-#ifdef ASSERT
- assert(word_size = block->word_size(), "Wrong chunk size");
-#endif
- Metablock* result = block;
- result->setSize(word_size);
- result->linkPrev(NULL);
- result->linkNext(NULL);
+Metablock* BlockFreelist::initialize_free_chunk(MetaWord* p, size_t word_size) {
+ Metablock* block = (Metablock*) p;
+ block->set_word_size(word_size);
+ block->set_prev(NULL);
+ block->set_next(NULL);
- return result;
-#else
- ShouldNotReachHere();
return block;
-#endif
}
-void BlockFreelist::return_block(Metablock* block, size_t word_size) {
-#ifdef ASSERT
- assert(word_size = block->word_size(), "Block size is wrong");;
-#endif
- Metablock* free_chunk = initialize_free_chunk(block, word_size);
-#ifdef DEALLOCATE_BLOCKS
+void BlockFreelist::return_block(MetaWord* p, size_t word_size) {
+ Metablock* free_chunk = initialize_free_chunk(p, word_size);
if (dictionary() == NULL) {
- _dictionary = new BinaryTreeDictionary<Metablock>(false /* adaptive_freelists */);
+ _dictionary = new BlockTreeDictionary();
}
- dictionary()->returnChunk(free_chunk);
-#endif
+ dictionary()->return_chunk(free_chunk);
}
-Metablock* BlockFreelist::get_block(size_t word_size) {
-#ifdef DEALLOCATE_BLOCKS
+MetaWord* BlockFreelist::get_block(size_t word_size) {
if (dictionary() == NULL) {
return NULL;
}
- Metablock* free_chunk =
- dictionary()->getChunk(word_size, FreeBlockDictionary<Metablock>::exactly);
-#else
- Metablock* free_chunk = NULL;
-#endif
- if (free_chunk == NULL) {
+ if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+ // Dark matter. Too small for dictionary.
return NULL;
}
- assert(free_chunk->word_size() == word_size, "Size of chunk is incorrect");
- Metablock* block = Metablock::initialize((MetaWord*) free_chunk, word_size);
-#ifdef ASSERT
- assert(block->word_size() == word_size, "Block size is not set correctly");
-#endif
- return block;
+ Metablock* free_block =
+ dictionary()->get_chunk(word_size, FreeBlockDictionary<Metablock>::exactly);
+ if (free_block == NULL) {
+ return NULL;
+ }
+
+ return (MetaWord*) free_block;
}
void BlockFreelist::print_on(outputStream* st) const {
-#ifdef DEALLOCATE_BLOCKS
if (dictionary() == NULL) {
return;
}
dictionary()->print_free_lists(st);
-#else
- return;
-#endif
}
// VirtualSpaceNode methods
@@ -1597,14 +1388,11 @@ void Metadebug::deallocate_block_a_lot(SpaceManager* sm,
Metadebug::deallocate_block_a_lot_count() % MetaDataDeallocateALotInterval == 0 ) {
Metadebug::set_deallocate_block_a_lot_count(0);
for (uint i = 0; i < metadata_deallocate_a_lot_block; i++) {
- Metablock* dummy_block = sm->allocate_work(raw_word_size);
+ MetaWord* dummy_block = sm->allocate_work(raw_word_size);
if (dummy_block == 0) {
break;
}
-#ifdef ASSERT
- assert(dummy_block->word_size() == raw_word_size, "Block size is not set correctly");
-#endif
- sm->deallocate(dummy_block->data());
+ sm->deallocate(dummy_block, raw_word_size);
}
} else {
Metadebug::inc_deallocate_block_a_lot_count();
@@ -1784,8 +1572,8 @@ void ChunkManager::verify() {
}
void ChunkManager::locked_verify() {
- locked_verify_free_chunks_total();
locked_verify_free_chunks_count();
+ locked_verify_free_chunks_total();
}
void ChunkManager::locked_print_free_chunks(outputStream* st) {
@@ -1803,7 +1591,6 @@ ChunkList* ChunkManager::free_chunks(ChunkIndex index) {
return &_free_chunks[index];
}
-
// These methods that sum the free chunk lists are used in printing
// methods that are used in product builds.
size_t ChunkManager::sum_free_chunks() {
@@ -1818,6 +1605,7 @@ size_t ChunkManager::sum_free_chunks() {
result = result + list->sum_list_capacity();
}
+ result = result + humongous_dictionary()->total_size();
return result;
}
@@ -1831,6 +1619,7 @@ size_t ChunkManager::sum_free_chunks_count() {
}
count = count + list->sum_list_count();
}
+ count = count + humongous_dictionary()->total_free_blocks();
return count;
}
@@ -1875,23 +1664,24 @@ Metachunk* ChunkManager::free_chunks_get(size_t word_size) {
assert_lock_strong(SpaceManager::expand_lock());
locked_verify();
- ChunkList* free_list = find_free_chunks_list(word_size);
- assert(free_list != NULL, "Sanity check");
- Metachunk* chunk = free_list->head();
- debug_only(Metachunk* debug_head = chunk;)
+ Metachunk* chunk = NULL;
+ if (!SpaceManager::is_humongous(word_size)) {
+ ChunkList* free_list = find_free_chunks_list(word_size);
+ assert(free_list != NULL, "Sanity check");
- if (chunk == NULL) {
- return NULL;
- }
+ chunk = free_list->head();
+ debug_only(Metachunk* debug_head = chunk;)
+
+ if (chunk == NULL) {
+ return NULL;
+ }
- Metachunk* prev_chunk = chunk;
- if (chunk->word_size() == word_size) {
- // Chunk is being removed from the chunks free list.
- dec_free_chunks_total(chunk->capacity_word_size());
// Remove the chunk as the head of the list.
free_list->set_head(chunk->next());
chunk->set_next(NULL);
+ // Chunk has been removed from the chunks free list.
+ dec_free_chunks_total(chunk->capacity_word_size());
if (TraceMetadataChunkAllocation && Verbose) {
tty->print_cr("ChunkManager::free_chunks_get: free_list "
@@ -1899,79 +1689,24 @@ Metachunk* ChunkManager::free_chunks_get(size_t word_size) {
free_list, chunk, chunk->word_size());
}
} else {
- assert(SpaceManager::is_humongous(word_size),
- "Should only need to check humongous");
- // This code to find the best fit is just for purposes of
- // investigating the loss due to fragmentation on a humongous
- // chunk. It will be replace by a binaryTreeDictionary for
- // the humongous chunks.
- uint count = 0;
- Metachunk* best_fit = NULL;
- Metachunk* best_fit_prev = NULL;
- while (chunk != NULL) {
- count++;
- if (chunk->word_size() < word_size) {
- prev_chunk = chunk;
- chunk = chunk->next();
- } else if (chunk->word_size() == word_size) {
- break;
- } else {
- if (best_fit == NULL ||
- best_fit->word_size() > chunk->word_size()) {
- best_fit_prev = prev_chunk;
- best_fit = chunk;
- }
- prev_chunk = chunk;
- chunk = chunk->next();
+ chunk = humongous_dictionary()->get_chunk(
+ word_size,
+ FreeBlockDictionary<Metachunk>::atLeast);
+
+ if (chunk != NULL) {
+ if (TraceMetadataHumongousAllocation) {
+ size_t waste = chunk->word_size() - word_size;
+ tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT
+ " for requested size " SIZE_FORMAT
+ " waste " SIZE_FORMAT,
+ chunk->word_size(), word_size, waste);
}
+ // Chunk is being removed from the chunks free list.
+ dec_free_chunks_total(chunk->capacity_word_size());
+#ifdef ASSERT
+ chunk->set_is_free(false);
+#endif
}
- if (chunk == NULL) {
- prev_chunk = best_fit_prev;
- chunk = best_fit;
- }
- if (chunk != NULL) {
- if (TraceMetadataHumongousAllocation) {
- size_t waste = chunk->word_size() - word_size;
- tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT
- " for requested size " SIZE_FORMAT
- " waste " SIZE_FORMAT
- " found at " SIZE_FORMAT " of " SIZE_FORMAT,
- chunk->word_size(), word_size, waste,
- count, free_list->sum_list_count());
- }
- // Chunk is being removed from the chunks free list.
- dec_free_chunks_total(chunk->capacity_word_size());
- // Remove the chunk if it is at the head of the list.
- if (chunk == free_list->head()) {
- free_list->set_head(chunk->next());
-
- if (TraceMetadataHumongousAllocation) {
- tty->print_cr("ChunkManager::free_chunks_get: humongous free_list "
- PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT
- " new head " PTR_FORMAT,
- free_list, chunk, chunk->word_size(),
- free_list->head());
- }
- } else {
- // Remove a chunk in the interior of the list
- prev_chunk->set_next(chunk->next());
-
- if (TraceMetadataHumongousAllocation) {
- tty->print_cr("ChunkManager::free_chunks_get: humongous free_list "
- PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT
- PTR_FORMAT " prev " PTR_FORMAT " next " PTR_FORMAT,
- free_list, chunk, chunk->word_size(),
- prev_chunk, chunk->next());
- }
- }
- chunk->set_next(NULL);
- } else {
- if (TraceMetadataHumongousAllocation) {
- tty->print_cr("ChunkManager::free_chunks_get: New humongous chunk of size "
- SIZE_FORMAT,
- word_size);
- }
- }
}
locked_verify();
return chunk;
@@ -2000,12 +1735,18 @@ Metachunk* ChunkManager::chunk_freelist_allocate(size_t word_size) {
return chunk;
}
+void ChunkManager::print_on(outputStream* out) {
+ if (PrintFLSStatistics != 0) {
+ humongous_dictionary()->report_statistics();
+ }
+}
+
// SpaceManager methods
size_t SpaceManager::sum_free_in_chunks_in_use() const {
MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
size_t free = 0;
- for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+ for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
Metachunk* chunk = chunks_in_use(i);
while (chunk != NULL) {
free += chunk->free_word_size();
@@ -2018,11 +1759,12 @@ size_t SpaceManager::sum_free_in_chunks_in_use() const {
size_t SpaceManager::sum_waste_in_chunks_in_use() const {
MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
size_t result = 0;
- for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
- // Count the free space in all the chunk but not the
- // current chunk from which allocations are still being done.
+ for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
+
+
result += sum_waste_in_chunks_in_use(i);
}
+
return result;
}
@@ -2033,10 +1775,10 @@ size_t SpaceManager::sum_waste_in_chunks_in_use(ChunkIndex index) const {
// Count the free space in all the chunk but not the
// current chunk from which allocations are still being done.
if (chunk != NULL) {
- while (chunk != NULL) {
- if (chunk != current_chunk()) {
- result += chunk->free_word_size();
- }
+ Metachunk* prev = chunk;
+ while (chunk != NULL && chunk != current_chunk()) {
+ result += chunk->free_word_size();
+ prev = chunk;
chunk = chunk->next();
count++;
}
@@ -2047,7 +1789,7 @@ size_t SpaceManager::sum_waste_in_chunks_in_use(ChunkIndex index) const {
size_t SpaceManager::sum_capacity_in_chunks_in_use() const {
MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
size_t sum = 0;
- for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+ for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
Metachunk* chunk = chunks_in_use(i);
while (chunk != NULL) {
// Just changed this sum += chunk->capacity_word_size();
@@ -2061,9 +1803,10 @@ size_t SpaceManager::sum_capacity_in_chunks_in_use() const {
size_t SpaceManager::sum_count_in_chunks_in_use() {
size_t count = 0;
- for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+ for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
count = count + sum_count_in_chunks_in_use(i);
}
+
return count;
}
@@ -2081,7 +1824,7 @@ size_t SpaceManager::sum_count_in_chunks_in_use(ChunkIndex i) {
size_t SpaceManager::sum_used_in_chunks_in_use() const {
MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
size_t used = 0;
- for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+ for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
Metachunk* chunk = chunks_in_use(i);
while (chunk != NULL) {
used += chunk->used_word_size();
@@ -2139,15 +1882,13 @@ size_t SpaceManager::calc_chunk_size(size_t word_size) {
gclog_or_tty->print_cr(" word_size " PTR_FORMAT, word_size);
gclog_or_tty->print_cr(" chunk_word_size " PTR_FORMAT,
chunk_word_size);
- gclog_or_tty->print_cr(" block overhead " PTR_FORMAT
- " chunk overhead " PTR_FORMAT,
- Metablock::overhead(),
+ gclog_or_tty->print_cr(" chunk overhead " PTR_FORMAT,
Metachunk::overhead());
}
return chunk_word_size;
}
-Metablock* SpaceManager::grow_and_allocate(size_t word_size) {
+MetaWord* SpaceManager::grow_and_allocate(size_t word_size) {
assert(vs_list()->current_virtual_space() != NULL,
"Should have been set");
assert(current_chunk() == NULL ||
@@ -2180,7 +1921,7 @@ Metablock* SpaceManager::grow_and_allocate(size_t word_size) {
void SpaceManager::print_on(outputStream* st) const {
for (ChunkIndex i = SmallIndex;
- i < NumberOfFreeLists ;
+ i < NumberOfInUseLists ;
i = next_chunk_index(i) ) {
st->print_cr(" chunks_in_use " PTR_FORMAT " chunk size " PTR_FORMAT,
chunks_in_use(i),
@@ -2191,8 +1932,11 @@ void SpaceManager::print_on(outputStream* st) const {
sum_waste_in_chunks_in_use(SmallIndex),
sum_waste_in_chunks_in_use(MediumIndex),
sum_waste_in_chunks_in_use(HumongousIndex));
- // Nothing in them yet
- // block_freelists()->print_on(st);
+ // block free lists
+ if (block_freelists() != NULL) {
+ st->print_cr("total in block free lists " SIZE_FORMAT,
+ block_freelists()->total_size());
+ }
}
SpaceManager::SpaceManager(Mutex* lock, VirtualSpaceList* vs_list) :
@@ -2200,7 +1944,7 @@ SpaceManager::SpaceManager(Mutex* lock, VirtualSpaceList* vs_list) :
_allocation_total(0),
_lock(lock) {
Metadebug::init_allocation_fail_alot_count();
- for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+ for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
_chunks_in_use[i] = NULL;
}
_current_chunk = NULL;
@@ -2262,22 +2006,24 @@ SpaceManager::~SpaceManager() {
// Humongous chunks are never the current chunk.
Metachunk* humongous_chunks = chunks_in_use(HumongousIndex);
- if (humongous_chunks != NULL) {
- chunk_manager->free_humongous_chunks()->add_at_head(humongous_chunks);
- set_chunks_in_use(HumongousIndex, NULL);
+ while (humongous_chunks != NULL) {
+#ifdef ASSERT
+ humongous_chunks->set_is_free(true);
+#endif
+ Metachunk* next_humongous_chunks = humongous_chunks->next();
+ chunk_manager->humongous_dictionary()->return_chunk(humongous_chunks);
+ humongous_chunks = next_humongous_chunks;
}
+ set_chunks_in_use(HumongousIndex, NULL);
chunk_manager->locked_verify();
}
-void SpaceManager::deallocate(MetaWord* p) {
+void SpaceManager::deallocate(MetaWord* p, size_t word_size) {
assert_lock_strong(_lock);
- ShouldNotReachHere(); // Where is this needed.
-#ifdef DEALLOCATE_BLOCKS
- Metablock* block = Metablock::metablock_from_data(p);
- // This is expense but kept it until integration JJJ
- assert(contains((address)block), "Block does not belong to this metaspace");
- block_freelists()->return_block(block, word_size);
-#endif
+ size_t min_size = TreeChunk<Metablock, FreeList>::min_size();
+ assert(word_size >= min_size,
+ err_msg("Should not deallocate dark matter " SIZE_FORMAT, word_size));
+ block_freelists()->return_block(p, word_size);
}
// Adds a chunk to the list of chunks in use.
@@ -2366,50 +2112,40 @@ void SpaceManager::add_chunk(Metachunk* new_chunk, bool make_current) {
MetaWord* SpaceManager::allocate(size_t word_size) {
MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
- size_t block_overhead = Metablock::overhead();
// If only the dictionary is going to be used (i.e., no
// indexed free list), then there is a minimum size requirement.
// MinChunkSize is a placeholder for the real minimum size JJJ
- size_t byte_size_with_overhead = (word_size + block_overhead) * BytesPerWord;
-#ifdef DEALLOCATE_BLOCKS
- size_t raw_bytes_size = MAX2(ARENA_ALIGN(byte_size_with_overhead),
- MinChunkSize * BytesPerWord);
-#else
- size_t raw_bytes_size = ARENA_ALIGN(byte_size_with_overhead);
-#endif
+ size_t byte_size = word_size * BytesPerWord;
+
+ size_t byte_size_with_overhead = byte_size + Metablock::overhead();
+
+ size_t raw_bytes_size = MAX2(byte_size_with_overhead,
+ Metablock::min_block_byte_size());
+ raw_bytes_size = ARENA_ALIGN(raw_bytes_size);
size_t raw_word_size = raw_bytes_size / BytesPerWord;
assert(raw_word_size * BytesPerWord == raw_bytes_size, "Size problem");
BlockFreelist* fl = block_freelists();
- Metablock* block = NULL;
+ MetaWord* p = NULL;
// Allocation from the dictionary is expensive in the sense that
// the dictionary has to be searched for a size. Don't allocate
// from the dictionary until it starts to get fat. Is this
// a reasonable policy? Maybe an skinny dictionary is fast enough
// for allocations. Do some profiling. JJJ
- if (fl->totalSize() > allocation_from_dictionary_limit) {
- block = fl->get_block(raw_word_size);
+ if (fl->total_size() > allocation_from_dictionary_limit) {
+ p = fl->get_block(raw_word_size);
}
- if (block == NULL) {
- block = allocate_work(raw_word_size);
- if (block == NULL) {
- return NULL;
- }
+ if (p == NULL) {
+ p = allocate_work(raw_word_size);
}
Metadebug::deallocate_block_a_lot(this, raw_word_size);
- // Push the allocation past the word containing the size and leader.
-#ifdef ASSERT
- MetaWord* result = block->data();
- return result;
-#else
- return (MetaWord*) block;
-#endif
+ return p;
}
// Returns the address of spaced allocated for "word_size".
// This methods does not know about blocks (Metablocks)
-Metablock* SpaceManager::allocate_work(size_t word_size) {
+MetaWord* SpaceManager::allocate_work(size_t word_size) {
assert_lock_strong(_lock);
#ifdef ASSERT
if (Metadebug::test_metadata_failure()) {
@@ -2417,7 +2153,7 @@ Metablock* SpaceManager::allocate_work(size_t word_size) {
}
#endif
// Is there space in the current chunk?
- Metablock* result = NULL;
+ MetaWord* result = NULL;
// For DumpSharedSpaces, only allocate out of the current chunk which is
// never null because we gave it the size we wanted. Caller reports out
@@ -2436,8 +2172,8 @@ Metablock* SpaceManager::allocate_work(size_t word_size) {
}
if (result > 0) {
inc_allocation_total(word_size);
- assert(result != (Metablock*) chunks_in_use(MediumIndex), "Head of the list is being allocated");
- assert(result->word_size() == word_size, "Size not set correctly");
+ assert(result != (MetaWord*) chunks_in_use(MediumIndex),
+ "Head of the list is being allocated");
}
return result;
@@ -2447,13 +2183,13 @@ void SpaceManager::verify() {
// If there are blocks in the dictionary, then
// verfication of chunks does not work since
// being in the dictionary alters a chunk.
- if (block_freelists()->totalSize() == 0) {
+ if (block_freelists()->total_size() == 0) {
// Skip the small chunks because their next link points to
// medium chunks. This is because the small chunk is the
// current chunk (for allocations) until it is full and the
// the addition of the next chunk does not NULL the next
// like of the small chunk.
- for (ChunkIndex i = MediumIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+ for (ChunkIndex i = MediumIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
Metachunk* curr = chunks_in_use(i);
while (curr != NULL) {
curr->verify();
@@ -2492,7 +2228,7 @@ void SpaceManager::dump(outputStream* const out) const {
// Add up statistics for all chunks in this SpaceManager.
for (ChunkIndex index = SmallIndex;
- index < NumberOfFreeLists;
+ index < NumberOfInUseLists;
index = next_chunk_index(index)) {
for (Metachunk* curr = chunks_in_use(index);
curr != NULL;
@@ -2521,7 +2257,7 @@ void SpaceManager::dump(outputStream* const out) const {
#ifdef ASSERT
void SpaceManager::mangle_freed_chunks() {
for (ChunkIndex index = SmallIndex;
- index < NumberOfFreeLists;
+ index < NumberOfInUseLists;
index = next_chunk_index(index)) {
for (Metachunk* curr = chunks_in_use(index);
curr != NULL;
@@ -2833,13 +2569,12 @@ void Metaspace::initialize(Mutex* lock, size_t initial_size) {
}
}
-
MetaWord* Metaspace::allocate(size_t word_size, MetadataType mdtype) {
// DumpSharedSpaces doesn't use class metadata area (yet)
if (mdtype == ClassType && !DumpSharedSpaces) {
- return class_vsm()->allocate(word_size);
+ return class_vsm()->allocate(word_size);
} else {
- return vsm()->allocate(word_size);
+ return vsm()->allocate(word_size);
}
}
@@ -2853,6 +2588,7 @@ MetaWord* Metaspace::expand_and_allocate(size_t word_size, MetadataType mdtype)
gclog_or_tty->print_cr("Increase capacity to GC from " SIZE_FORMAT
" to " SIZE_FORMAT, before_inc, MetaspaceGC::capacity_until_GC());
}
+
result = allocate(word_size, mdtype);
return result;
@@ -2889,37 +2625,39 @@ size_t Metaspace::capacity_words(MetadataType mdtype) const {
void Metaspace::deallocate(MetaWord* ptr, size_t word_size, bool is_class) {
if (SafepointSynchronize::is_at_safepoint()) {
assert(Thread::current()->is_VM_thread(), "should be the VM thread");
- // Don't take lock
-#ifdef DEALLOCATE_BLOCKS
+ // Don't take Heap_lock
+ MutexLocker ml(vsm()->lock());
+ if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+ // Dark matter. Too small for dictionary.
+#ifdef ASSERT
+ Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5);
+#endif
+ return;
+ }
if (is_class) {
- class_vsm()->deallocate(ptr);
+ class_vsm()->deallocate(ptr, word_size);
} else {
- vsm()->deallocate(ptr);
+ vsm()->deallocate(ptr, word_size);
}
-#else
-#ifdef ASSERT
- Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate);
-#endif
-#endif
-
} else {
MutexLocker ml(vsm()->lock());
-#ifdef DEALLOCATE_BLOCKS
+ if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+ // Dark matter. Too small for dictionary.
+#ifdef ASSERT
+ Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5);
+#endif
+ return;
+ }
if (is_class) {
- class_vsm()->deallocate(ptr);
+ class_vsm()->deallocate(ptr, word_size);
} else {
- vsm()->deallocate(ptr);
+ vsm()->deallocate(ptr, word_size);
}
-#else
-#ifdef ASSERT
- Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate);
-#endif
-#endif
}
}
-MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
+Metablock* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
bool read_only, MetadataType mdtype, TRAPS) {
if (HAS_PENDING_EXCEPTION) {
assert(false, "Should not allocate with exception pending");
@@ -2943,7 +2681,7 @@ MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
if (result == NULL) {
report_out_of_shared_space(read_only ? SharedReadOnly : SharedReadWrite);
}
- return result;
+ return Metablock::initialize(result, word_size);
}
result = loader_data->metaspace_non_null()->allocate(word_size, mdtype);
@@ -2951,7 +2689,7 @@ MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
if (result == NULL) {
// Try to clean out some memory and retry.
result =
- Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation(
+ Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation(
loader_data, word_size, mdtype);
// If result is still null, we are out of memory.
@@ -2967,7 +2705,7 @@ MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
THROW_OOP_0(Universe::out_of_memory_error_perm_gen());
}
}
- return result;
+ return Metablock::initialize(result, word_size);
}
void Metaspace::print_on(outputStream* out) const {
diff --git a/src/share/vm/memory/metaspace.hpp b/src/share/vm/memory/metaspace.hpp
index 169a3b71d..c42a979cb 100644
--- a/src/share/vm/memory/metaspace.hpp
+++ b/src/share/vm/memory/metaspace.hpp
@@ -57,12 +57,10 @@
//
class ClassLoaderData;
+class Metablock;
class MetaWord;
class Mutex;
class outputStream;
-class FreeChunk;
-template <class Chunk_t> class FreeList;
-template <class Chunk_t> class BinaryTreeDictionary;
class SpaceManager;
// Metaspaces each have a SpaceManager and allocations
@@ -128,7 +126,7 @@ class Metaspace : public CHeapObj<mtClass> {
size_t capacity_words(MetadataType mdtype) const;
size_t waste_words(MetadataType mdtype) const;
- static MetaWord* allocate(ClassLoaderData* loader_data, size_t size,
+ static Metablock* allocate(ClassLoaderData* loader_data, size_t size,
bool read_only, MetadataType mdtype, TRAPS);
void deallocate(MetaWord* ptr, size_t byte_size, bool is_class);
diff --git a/src/share/vm/memory/metaspaceShared.cpp b/src/share/vm/memory/metaspaceShared.cpp
index f69e7586e..dde26b5f8 100644
--- a/src/share/vm/memory/metaspaceShared.cpp
+++ b/src/share/vm/memory/metaspaceShared.cpp
@@ -663,8 +663,8 @@ bool MetaspaceShared::is_in_shared_space(const void* p) {
if (_ro_base == NULL || _rw_base == NULL) {
return false;
} else {
- return ((p > _ro_base && p < (_ro_base + SharedReadOnlySize)) ||
- (p > _rw_base && p < (_rw_base + SharedReadWriteSize)));
+ return ((p >= _ro_base && p < (_ro_base + SharedReadOnlySize)) ||
+ (p >= _rw_base && p < (_rw_base + SharedReadWriteSize)));
}
}
@@ -693,14 +693,6 @@ bool MetaspaceShared::map_shared_spaces(FileMapInfo* mapinfo) {
ReservedSpace shared_rs = mapinfo->reserve_shared_memory();
if (!shared_rs.is_reserved()) return false;
- // Split reserved memory into pieces (windows needs this)
- ReservedSpace ro_rs = shared_rs.first_part(SharedReadOnlySize);
- ReservedSpace tmp_rs1 = shared_rs.last_part(SharedReadOnlySize);
- ReservedSpace rw_rs = tmp_rs1.first_part(SharedReadWriteSize);
- ReservedSpace tmp_rs2 = tmp_rs1.last_part(SharedReadWriteSize);
- ReservedSpace md_rs = tmp_rs2.first_part(SharedMiscDataSize);
- ReservedSpace mc_rs = tmp_rs2.last_part(SharedMiscDataSize);
-
// Map each shared region
if ((_ro_base = mapinfo->map_region(ro)) != NULL &&
(_rw_base = mapinfo->map_region(rw)) != NULL &&
diff --git a/src/share/vm/memory/resourceArea.hpp b/src/share/vm/memory/resourceArea.hpp
index f42c0f624..f1418ce19 100644
--- a/src/share/vm/memory/resourceArea.hpp
+++ b/src/share/vm/memory/resourceArea.hpp
@@ -127,15 +127,21 @@ protected:
void reset_to_mark() {
if (UseMallocOnly) free_malloced_objects();
- if( _chunk->next() ) // Delete later chunks
+ if( _chunk->next() ) { // Delete later chunks
+ // reset arena size before delete chunks. Otherwise, the total
+ // arena size could exceed total chunk size
+ assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check");
+ _area->set_size_in_bytes(size_in_bytes());
_chunk->next_chop();
+ } else {
+ assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check");
+ }
_area->_chunk = _chunk; // Roll back arena to saved chunk
_area->_hwm = _hwm;
_area->_max = _max;
// clear out this chunk (to detect allocation bugs)
if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm);
- _area->set_size_in_bytes(size_in_bytes());
}
~ResourceMark() {
@@ -219,15 +225,21 @@ protected:
void reset_to_mark() {
if (UseMallocOnly) free_malloced_objects();
- if( _chunk->next() ) // Delete later chunks
+ if( _chunk->next() ) { // Delete later chunks
+ // reset arena size before delete chunks. Otherwise, the total
+ // arena size could exceed total chunk size
+ assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check");
+ _area->set_size_in_bytes(size_in_bytes());
_chunk->next_chop();
+ } else {
+ assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check");
+ }
_area->_chunk = _chunk; // Roll back arena to saved chunk
_area->_hwm = _hwm;
_area->_max = _max;
// clear out this chunk (to detect allocation bugs)
if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm);
- _area->set_size_in_bytes(size_in_bytes());
}
~DeoptResourceMark() {
diff --git a/src/share/vm/oops/method.cpp b/src/share/vm/oops/method.cpp
index 5a1032f77..9849829ea 100644
--- a/src/share/vm/oops/method.cpp
+++ b/src/share/vm/oops/method.cpp
@@ -1155,8 +1155,12 @@ methodHandle Method::clone_with_new_data(methodHandle m, u_char* new_code, int n
vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
// if loader is not the default loader (i.e., != NULL), we can't know the intrinsics
// because we are not loading from core libraries
- if (InstanceKlass::cast(holder)->class_loader() != NULL)
+ // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar
+ // which does not use the class default class loader so we check for its loader here
+ if ((InstanceKlass::cast(holder)->class_loader() != NULL) &&
+ InstanceKlass::cast(holder)->class_loader()->klass()->name() != vmSymbols::sun_misc_Launcher_ExtClassLoader()) {
return vmSymbols::NO_SID; // regardless of name, no intrinsics here
+ }
// see if the klass name is well-known:
Symbol* klass_name = InstanceKlass::cast(holder)->name();
diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp
index 80996a5d7..4fdebf526 100644
--- a/src/share/vm/opto/c2_globals.hpp
+++ b/src/share/vm/opto/c2_globals.hpp
@@ -439,6 +439,9 @@
product(bool, DoEscapeAnalysis, true, \
"Perform escape analysis") \
\
+ develop(bool, ExitEscapeAnalysisOnTimeout, true, \
+ "Exit or throw assert in EA when it reaches time limit") \
+ \
notproduct(bool, PrintEscapeAnalysis, false, \
"Print the results of escape analysis") \
\
diff --git a/src/share/vm/opto/callGenerator.cpp b/src/share/vm/opto/callGenerator.cpp
index 547096b3d..93f2b859b 100644
--- a/src/share/vm/opto/callGenerator.cpp
+++ b/src/share/vm/opto/callGenerator.cpp
@@ -670,6 +670,129 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
}
+//------------------------PredictedIntrinsicGenerator------------------------------
+// Internal class which handles all predicted Intrinsic calls.
+class PredictedIntrinsicGenerator : public CallGenerator {
+ CallGenerator* _intrinsic;
+ CallGenerator* _cg;
+
+public:
+ PredictedIntrinsicGenerator(CallGenerator* intrinsic,
+ CallGenerator* cg)
+ : CallGenerator(cg->method())
+ {
+ _intrinsic = intrinsic;
+ _cg = cg;
+ }
+
+ virtual bool is_virtual() const { return true; }
+ virtual bool is_inlined() const { return true; }
+ virtual bool is_intrinsic() const { return true; }
+
+ virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
+ CallGenerator* cg) {
+ return new PredictedIntrinsicGenerator(intrinsic, cg);
+}
+
+
+JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) {
+ GraphKit kit(jvms);
+ PhaseGVN& gvn = kit.gvn();
+
+ CompileLog* log = kit.C->log();
+ if (log != NULL) {
+ log->elem("predicted_intrinsic bci='%d' method='%d'",
+ jvms->bci(), log->identify(method()));
+ }
+
+ Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
+ if (kit.failing())
+ return NULL; // might happen because of NodeCountInliningCutoff
+
+ SafePointNode* slow_map = NULL;
+ JVMState* slow_jvms;
+ if (slow_ctl != NULL) {
+ PreserveJVMState pjvms(&kit);
+ kit.set_control(slow_ctl);
+ if (!kit.stopped()) {
+ slow_jvms = _cg->generate(kit.sync_jvms());
+ if (kit.failing())
+ return NULL; // might happen because of NodeCountInliningCutoff
+ assert(slow_jvms != NULL, "must be");
+ kit.add_exception_states_from(slow_jvms);
+ kit.set_map(slow_jvms->map());
+ if (!kit.stopped())
+ slow_map = kit.stop();
+ }
+ }
+
+ if (kit.stopped()) {
+ // Predicate is always false.
+ kit.set_jvms(slow_jvms);
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ // Generate intrinsic code:
+ JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms());
+ if (new_jvms == NULL) {
+ // Intrinsic failed, so use slow code or make a direct call.
+ if (slow_map == NULL) {
+ CallGenerator* cg = CallGenerator::for_direct_call(method());
+ new_jvms = cg->generate(kit.sync_jvms());
+ } else {
+ kit.set_jvms(slow_jvms);
+ return kit.transfer_exceptions_into_jvms();
+ }
+ }
+ kit.add_exception_states_from(new_jvms);
+ kit.set_jvms(new_jvms);
+
+ // Need to merge slow and fast?
+ if (slow_map == NULL) {
+ // The fast path is the only path remaining.
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ if (kit.stopped()) {
+ // Intrinsic method threw an exception, so it's just the slow path after all.
+ kit.set_jvms(slow_jvms);
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ // Finish the diamond.
+ kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+ RegionNode* region = new (kit.C) RegionNode(3);
+ region->init_req(1, kit.control());
+ region->init_req(2, slow_map->control());
+ kit.set_control(gvn.transform(region));
+ Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+ iophi->set_req(2, slow_map->i_o());
+ kit.set_i_o(gvn.transform(iophi));
+ kit.merge_memory(slow_map->merged_memory(), region, 2);
+ uint tos = kit.jvms()->stkoff() + kit.sp();
+ uint limit = slow_map->req();
+ for (uint i = TypeFunc::Parms; i < limit; i++) {
+ // Skip unused stack slots; fast forward to monoff();
+ if (i == tos) {
+ i = kit.jvms()->monoff();
+ if( i >= limit ) break;
+ }
+ Node* m = kit.map()->in(i);
+ Node* n = slow_map->in(i);
+ if (m != n) {
+ const Type* t = gvn.type(m)->meet(gvn.type(n));
+ Node* phi = PhiNode::make(region, m, t);
+ phi->set_req(2, n);
+ kit.map()->set_req(i, gvn.transform(phi));
+ }
+ }
+ return kit.transfer_exceptions_into_jvms();
+}
+
//-------------------------UncommonTrapCallGenerator-----------------------------
// Internal class which handles all out-of-line calls checking receiver type.
class UncommonTrapCallGenerator : public CallGenerator {
diff --git a/src/share/vm/opto/callGenerator.hpp b/src/share/vm/opto/callGenerator.hpp
index 3cfd39df6..ae59173bf 100644
--- a/src/share/vm/opto/callGenerator.hpp
+++ b/src/share/vm/opto/callGenerator.hpp
@@ -143,6 +143,9 @@ class CallGenerator : public ResourceObj {
// Registry for intrinsics:
static CallGenerator* for_intrinsic(ciMethod* m);
static void register_intrinsic(ciMethod* m, CallGenerator* cg);
+ static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
+ CallGenerator* cg);
+ virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
if (PrintInlining)
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
index d870872a9..c4da70ee3 100644
--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -3047,9 +3047,9 @@ bool Compile::Constant::operator==(const Constant& other) {
case T_LONG:
case T_DOUBLE: return (_v._value.j == other._v._value.j);
case T_OBJECT:
- case T_METADATA: return (_v._metadata == other._v._metadata);
case T_ADDRESS: return (_v._value.l == other._v._value.l);
case T_VOID: return (_v._value.l == other._v._value.l); // jump-table entries
+ case T_METADATA: return (_v._metadata == other._v._metadata);
default: ShouldNotReachHere();
}
return false;
diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp
index 6bcf3d3da..44bf27768 100644
--- a/src/share/vm/opto/compile.hpp
+++ b/src/share/vm/opto/compile.hpp
@@ -149,7 +149,7 @@ class Compile : public Phase {
private:
BasicType _type;
union {
- jvalue _value;
+ jvalue _value;
Metadata* _metadata;
} _v;
int _offset; // offset of this constant (in bytes) relative to the constant table base.
diff --git a/src/share/vm/opto/doCall.cpp b/src/share/vm/opto/doCall.cpp
index 30a01f34b..95d148841 100644
--- a/src/share/vm/opto/doCall.cpp
+++ b/src/share/vm/opto/doCall.cpp
@@ -107,7 +107,17 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
// intrinsics handle strict f.p. correctly.
if (allow_inline && allow_intrinsics) {
CallGenerator* cg = find_intrinsic(callee, call_is_virtual);
- if (cg != NULL) return cg;
+ if (cg != NULL) {
+ if (cg->is_predicted()) {
+ // Code without intrinsic but, hopefully, inlined.
+ CallGenerator* inline_cg = this->call_generator(callee,
+ vtable_index, call_is_virtual, jvms, allow_inline, prof_factor, false);
+ if (inline_cg != NULL) {
+ cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
+ }
+ }
+ return cg;
+ }
}
// Do method handle calls.
diff --git a/src/share/vm/opto/escape.cpp b/src/share/vm/opto/escape.cpp
index 9fd318050..a5aa47119 100644
--- a/src/share/vm/opto/escape.cpp
+++ b/src/share/vm/opto/escape.cpp
@@ -893,12 +893,16 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
arg_has_oops && (i > TypeFunc::Parms);
#ifdef ASSERT
if (!(is_arraycopy ||
- call->as_CallLeaf()->_name != NULL &&
- (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
- strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
- ) {
+ (call->as_CallLeaf()->_name != NULL &&
+ (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
+ ))) {
call->dump();
- assert(false, "EA: unexpected CallLeaf");
+ fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
}
#endif
// Always process arraycopy's destination object since
@@ -1080,7 +1084,7 @@ bool ConnectionGraph::complete_connection_graph(
C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time");
C->log()->end_elem(" limit'");
}
- assert(false, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
+ assert(ExitEscapeAnalysisOnTimeout, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
time.seconds(), iterations, nodes_size(), ptnodes_worklist.length()));
// Possible infinite build_connection_graph loop,
// bailout (no changes to ideal graph were made).
diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp
index f34df79b4..6b90061ff 100644
--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -44,18 +44,22 @@ class LibraryIntrinsic : public InlineCallGenerator {
public:
private:
bool _is_virtual;
+ bool _is_predicted;
vmIntrinsics::ID _intrinsic_id;
public:
- LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id)
+ LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, vmIntrinsics::ID id)
: InlineCallGenerator(m),
_is_virtual(is_virtual),
+ _is_predicted(is_predicted),
_intrinsic_id(id)
{
}
virtual bool is_intrinsic() const { return true; }
virtual bool is_virtual() const { return _is_virtual; }
+ virtual bool is_predicted() const { return _is_predicted; }
virtual JVMState* generate(JVMState* jvms);
+ virtual Node* generate_predicate(JVMState* jvms);
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
};
@@ -83,6 +87,7 @@ class LibraryCallKit : public GraphKit {
int arg_size() const { return callee()->arg_size(); }
bool try_to_inline();
+ Node* try_to_predicate();
// Helper functions to inline natives
void push_result(RegionNode* region, PhiNode* value);
@@ -148,6 +153,7 @@ class LibraryCallKit : public GraphKit {
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, true, false);
}
+ Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
Node* make_string_method_node(int opcode, Node* str1, Node* str2);
@@ -253,6 +259,10 @@ class LibraryCallKit : public GraphKit {
bool inline_reverseBytes(vmIntrinsics::ID id);
bool inline_reference_get();
+ bool inline_aescrypt_Block(vmIntrinsics::ID id);
+ bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
+ Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
+ Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
};
@@ -306,6 +316,8 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
}
}
+ bool is_predicted = false;
+
switch (id) {
case vmIntrinsics::_compareTo:
if (!SpecialStringCompareTo) return NULL;
@@ -413,6 +425,18 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
break;
#endif
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ if (!UseAESIntrinsics) return NULL;
+ break;
+
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ if (!UseAESIntrinsics) return NULL;
+ // these two require the predicated logic
+ is_predicted = true;
+ break;
+
default:
assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -444,7 +468,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
if (!InlineUnsafeOps) return NULL;
}
- return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id);
+ return new LibraryIntrinsic(m, is_virtual, is_predicted, (vmIntrinsics::ID) id);
}
//----------------------register_library_intrinsics-----------------------
@@ -496,6 +520,47 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
return NULL;
}
+Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
+ LibraryCallKit kit(jvms, this);
+ Compile* C = kit.C;
+ int nodes = C->unique();
+#ifndef PRODUCT
+ assert(is_predicted(), "sanity");
+ if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
+ char buf[1000];
+ const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
+ tty->print_cr("Predicate for intrinsic %s", str);
+ }
+#endif
+
+ Node* slow_ctl = kit.try_to_predicate();
+ if (!kit.failing()) {
+ if (C->log()) {
+ C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'",
+ vmIntrinsics::name_at(intrinsic_id()),
+ (is_virtual() ? " virtual='1'" : ""),
+ C->unique() - nodes);
+ }
+ return slow_ctl; // Could be NULL if the check folds.
+ }
+
+ // The intrinsic bailed out
+ if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
+ if (jvms->has_method()) {
+ // Not a root compile.
+ const char* msg = "failed to generate predicate for intrinsic";
+ CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg);
+ } else {
+ // Root compile
+ tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
+ vmIntrinsics::name_at(intrinsic_id()),
+ (is_virtual() ? " (virtual)" : ""), kit.bci());
+ }
+ }
+ C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
+ return NULL;
+}
+
bool LibraryCallKit::try_to_inline() {
// Handle symbolic names for otherwise undistinguished boolean switches:
const bool is_store = true;
@@ -767,6 +832,14 @@ bool LibraryCallKit::try_to_inline() {
case vmIntrinsics::_Reference_get:
return inline_reference_get();
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ return inline_aescrypt_Block(intrinsic_id());
+
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
+
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
@@ -780,6 +853,36 @@ bool LibraryCallKit::try_to_inline() {
}
}
+Node* LibraryCallKit::try_to_predicate() {
+ if (!jvms()->has_method()) {
+ // Root JVMState has a null method.
+ assert(map()->memory()->Opcode() == Op_Parm, "");
+ // Insert the memory aliasing node
+ set_all_memory(reset_memory());
+ }
+ assert(merged_memory(), "");
+
+ switch (intrinsic_id()) {
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ return inline_cipherBlockChaining_AESCrypt_predicate(false);
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ return inline_cipherBlockChaining_AESCrypt_predicate(true);
+
+ default:
+ // If you get here, it may be that someone has added a new intrinsic
+ // to the list in vmSymbols.hpp without implementing it here.
+#ifndef PRODUCT
+ if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
+ tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)",
+ vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
+ }
+#endif
+ Node* slow_ctl = control();
+ set_control(top()); // No fast path instrinsic
+ return slow_ctl;
+ }
+}
+
//------------------------------push_result------------------------------
// Helper function for finishing intrinsics.
void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
@@ -3830,7 +3933,7 @@ Node* LibraryCallKit::generate_virtual_guard(Node* obj_klass,
vtable_index*vtableEntry::size()) * wordSize +
vtableEntry::method_offset_in_bytes();
Node* entry_addr = basic_plus_adr(obj_klass, entry_offset);
- Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT);
+ Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS);
// Compare the target method with the expected method (e.g., Object.hashCode).
const TypePtr* native_call_addr = TypeMetadataPtr::make(method);
@@ -5613,3 +5716,265 @@ bool LibraryCallKit::inline_reference_get() {
push(result);
return true;
}
+
+
+Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
+ bool is_exact=true, bool is_static=false) {
+
+ const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
+ assert(tinst != NULL, "obj is null");
+ assert(tinst->klass()->is_loaded(), "obj is not loaded");
+ assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
+
+ ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName),
+ ciSymbol::make(fieldTypeString),
+ is_static);
+ if (field == NULL) return (Node *) NULL;
+ assert (field != NULL, "undefined field");
+
+ // Next code copied from Parse::do_get_xxx():
+
+ // Compute address and memory type.
+ int offset = field->offset_in_bytes();
+ bool is_vol = field->is_volatile();
+ ciType* field_klass = field->type();
+ assert(field_klass->is_loaded(), "should be loaded");
+ const TypePtr* adr_type = C->alias_type(field)->adr_type();
+ Node *adr = basic_plus_adr(fromObj, fromObj, offset);
+ BasicType bt = field->layout_type();
+
+ // Build the resultant type of the load
+ const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+
+ // Build the load.
+ Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol);
+ return loadedField;
+}
+
+
+//------------------------------inline_aescrypt_Block-----------------------
+bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
+ address stubAddr;
+ const char *stubName;
+ assert(UseAES, "need AES instruction support");
+
+ switch(id) {
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ stubAddr = StubRoutines::aescrypt_encryptBlock();
+ stubName = "aescrypt_encryptBlock";
+ break;
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ stubAddr = StubRoutines::aescrypt_decryptBlock();
+ stubName = "aescrypt_decryptBlock";
+ break;
+ }
+ if (stubAddr == NULL) return false;
+
+ // Restore the stack and pop off the arguments.
+ int nargs = 5; // this + 2 oop/offset combos
+ assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments");
+
+ Node *aescrypt_object = argument(0);
+ Node *src = argument(1);
+ Node *src_offset = argument(2);
+ Node *dest = argument(3);
+ Node *dest_offset = argument(4);
+
+ // (1) src and dest are arrays.
+ const Type* src_type = src->Value(&_gvn);
+ const Type* dest_type = dest->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+ assert (top_src != NULL && top_src->klass() != NULL && top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+ // for the quick and dirty code we will skip all the checks.
+ // we are just trying to get the call to be generated.
+ Node* src_start = src;
+ Node* dest_start = dest;
+ if (src_offset != NULL || dest_offset != NULL) {
+ assert(src_offset != NULL && dest_offset != NULL, "");
+ src_start = array_element_address(src, src_offset, T_BYTE);
+ dest_start = array_element_address(dest, dest_offset, T_BYTE);
+ }
+
+ // now need to get the start of its expanded key array
+ // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+ Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+ if (k_start == NULL) return false;
+
+ // Call the stub.
+ make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ src_start, dest_start, k_start);
+
+ return true;
+}
+
+//------------------------------inline_cipherBlockChaining_AESCrypt-----------------------
+bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
+ address stubAddr;
+ const char *stubName;
+
+ assert(UseAES, "need AES instruction support");
+
+ switch(id) {
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ stubAddr = StubRoutines::cipherBlockChaining_encryptAESCrypt();
+ stubName = "cipherBlockChaining_encryptAESCrypt";
+ break;
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ stubAddr = StubRoutines::cipherBlockChaining_decryptAESCrypt();
+ stubName = "cipherBlockChaining_decryptAESCrypt";
+ break;
+ }
+ if (stubAddr == NULL) return false;
+
+
+ // Restore the stack and pop off the arguments.
+ int nargs = 6; // this + oop/offset + len + oop/offset
+ assert(callee()->signature()->size() == nargs-1, "wrong number of arguments");
+ Node *cipherBlockChaining_object = argument(0);
+ Node *src = argument(1);
+ Node *src_offset = argument(2);
+ Node *len = argument(3);
+ Node *dest = argument(4);
+ Node *dest_offset = argument(5);
+
+ // (1) src and dest are arrays.
+ const Type* src_type = src->Value(&_gvn);
+ const Type* dest_type = dest->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+ assert (top_src != NULL && top_src->klass() != NULL
+ && top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+ // checks are the responsibility of the caller
+ Node* src_start = src;
+ Node* dest_start = dest;
+ if (src_offset != NULL || dest_offset != NULL) {
+ assert(src_offset != NULL && dest_offset != NULL, "");
+ src_start = array_element_address(src, src_offset, T_BYTE);
+ dest_start = array_element_address(dest, dest_offset, T_BYTE);
+ }
+
+ // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
+ // (because of the predicated logic executed earlier).
+ // so we cast it here safely.
+ // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+
+ Node* embeddedCipherObj = load_field_from_object(cipherBlockChaining_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+ if (embeddedCipherObj == NULL) return false;
+
+ // cast it to what we know it will be at runtime
+ const TypeInstPtr* tinst = _gvn.type(cipherBlockChaining_object)->isa_instptr();
+ assert(tinst != NULL, "CBC obj is null");
+ assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+ if (!klass_AESCrypt->is_loaded()) return false;
+
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+ const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
+ const TypeOopPtr* xtype = aklass->as_instance_type();
+ Node* aescrypt_object = new(C) CheckCastPPNode(control(), embeddedCipherObj, xtype);
+ aescrypt_object = _gvn.transform(aescrypt_object);
+
+ // we need to get the start of the aescrypt_object's expanded key array
+ Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+ if (k_start == NULL) return false;
+
+ // similarly, get the start address of the r vector
+ Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false);
+ if (objRvec == NULL) return false;
+ Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
+
+ // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+ make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ src_start, dest_start, k_start, r_start, len);
+
+ // return is void so no result needs to be pushed
+
+ return true;
+}
+
+//------------------------------get_key_start_from_aescrypt_object-----------------------
+Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
+ Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false);
+ assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
+ if (objAESCryptKey == NULL) return (Node *) NULL;
+
+ // now have the array, need to get the start address of the K array
+ Node* k_start = array_element_address(objAESCryptKey, intcon(0), T_INT);
+ return k_start;
+}
+
+//----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+// for encryption:
+// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
+// for decryption:
+// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
+// note cipher==plain is more conservative than the original java code but that's OK
+//
+Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
+ // First, check receiver for NULL since it is virtual method.
+ int nargs = arg_size();
+ Node* objCBC = argument(0);
+ _sp += nargs;
+ objCBC = do_null_check(objCBC, T_OBJECT);
+ _sp -= nargs;
+
+ if (stopped()) return NULL; // Always NULL
+
+ // Load embeddedCipher field of CipherBlockChaining object.
+ Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+
+ // get AESCrypt klass for instanceOf check
+ // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
+ // will have same classloader as CipherBlockChaining object
+ const TypeInstPtr* tinst = _gvn.type(objCBC)->isa_instptr();
+ assert(tinst != NULL, "CBCobj is null");
+ assert(tinst->klass()->is_loaded(), "CBCobj is not loaded");
+
+ // we want to do an instanceof comparison against the AESCrypt class
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+ if (!klass_AESCrypt->is_loaded()) {
+ // if AESCrypt is not even loaded, we never take the intrinsic fast path
+ Node* ctrl = control();
+ set_control(top()); // no regular fast path
+ return ctrl;
+ }
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+
+ _sp += nargs; // gen_instanceof might do an uncommon trap
+ Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
+ _sp -= nargs;
+ Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1)));
+ Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+
+ Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+ // for encryption, we are done
+ if (!decrypting)
+ return instof_false; // even if it is NULL
+
+ // for decryption, we need to add a further check to avoid
+ // taking the intrinsic path when cipher and plain are the same
+ // see the original java code for why.
+ RegionNode* region = new(C) RegionNode(3);
+ region->init_req(1, instof_false);
+ Node* src = argument(1);
+ Node *dest = argument(4);
+ Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest));
+ Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq));
+ Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN);
+ region->init_req(2, src_dest_conjoint);
+
+ record_for_igvn(region);
+ return _gvn.transform(region);
+
+}
+
+
diff --git a/src/share/vm/opto/mulnode.cpp b/src/share/vm/opto/mulnode.cpp
index 4572a265e..4047b933f 100644
--- a/src/share/vm/opto/mulnode.cpp
+++ b/src/share/vm/opto/mulnode.cpp
@@ -479,24 +479,27 @@ Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) {
return new (phase->C) AndINode(load,phase->intcon(mask&0xFFFF));
// Masking bits off of a Short? Loading a Character does some masking
- if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
- Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
- load->in(MemNode::Memory),
- load->in(MemNode::Address),
- load->adr_type());
- ldus = phase->transform(ldus);
- return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
- }
+ if (can_reshape &&
+ load->outcnt() == 1 && load->unique_out() == this) {
+ if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
+ Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
+ load->in(MemNode::Memory),
+ load->in(MemNode::Address),
+ load->adr_type());
+ ldus = phase->transform(ldus);
+ return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
+ }
- // Masking sign bits off of a Byte? Do an unsigned byte load plus
- // an and.
- if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
- Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
- load->in(MemNode::Memory),
- load->in(MemNode::Address),
- load->adr_type());
- ldub = phase->transform(ldub);
- return new (phase->C) AndINode(ldub, phase->intcon(mask));
+ // Masking sign bits off of a Byte? Do an unsigned byte load plus
+ // an and.
+ if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
+ Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
+ load->in(MemNode::Memory),
+ load->in(MemNode::Address),
+ load->adr_type());
+ ldub = phase->transform(ldub);
+ return new (phase->C) AndINode(ldub, phase->intcon(mask));
+ }
}
// Masking off sign bits? Dont make them!
@@ -923,7 +926,9 @@ Node *RShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
set_req(2, phase->intcon(0));
return this;
}
- else if( ld->Opcode() == Op_LoadUS )
+ else if( can_reshape &&
+ ld->Opcode() == Op_LoadUS &&
+ ld->outcnt() == 1 && ld->unique_out() == shl)
// Replace zero-extension-load with sign-extension-load
return new (phase->C) LoadSNode( ld->in(MemNode::Control),
ld->in(MemNode::Memory),
diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp
index bb050533d..51987e25e 100644
--- a/src/share/vm/opto/runtime.cpp
+++ b/src/share/vm/opto/runtime.cpp
@@ -811,6 +811,48 @@ const TypeFunc* OptoRuntime::array_fill_Type() {
return TypeFunc::make(domain, range);
}
+// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
+const TypeFunc* OptoRuntime::aescrypt_block_Type() {
+ // create input type (domain)
+ int num_args = 3;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypePtr::NOTNULL; // src
+ fields[argp++] = TypePtr::NOTNULL; // dest
+ fields[argp++] = TypePtr::NOTNULL; // k array
+ assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+ // no result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain, range);
+}
+
+// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
+const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
+ // create input type (domain)
+ int num_args = 5;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypePtr::NOTNULL; // src
+ fields[argp++] = TypePtr::NOTNULL; // dest
+ fields[argp++] = TypePtr::NOTNULL; // k array
+ fields[argp++] = TypePtr::NOTNULL; // r array
+ fields[argp++] = TypeInt::INT; // src len
+ assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+ // no result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain, range);
+}
+
//------------- Interpreter state access for on stack replacement
const TypeFunc* OptoRuntime::osr_end_Type() {
// create input type (domain)
diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp
index c70777267..13da255b7 100644
--- a/src/share/vm/opto/runtime.hpp
+++ b/src/share/vm/opto/runtime.hpp
@@ -280,6 +280,9 @@ private:
static const TypeFunc* array_fill_Type();
+ static const TypeFunc* aescrypt_block_Type();
+ static const TypeFunc* cipherBlockChaining_aescrypt_Type();
+
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
index ffc5394bb..f53c64837 100644
--- a/src/share/vm/opto/superword.cpp
+++ b/src/share/vm/opto/superword.cpp
@@ -1776,16 +1776,15 @@ void SuperWord::compute_vector_element_type() {
set_velt_type(n, container_type(n));
}
- // Propagate narrowed type backwards through operations
+ // Propagate integer narrowed type backwards through operations
// that don't depend on higher order bits
for (int i = _block.length() - 1; i >= 0; i--) {
Node* n = _block.at(i);
// Only integer types need be examined
- const Type* vt = velt_type(n);
- if (vt->basic_type() == T_INT) {
+ const Type* vtn = velt_type(n);
+ if (vtn->basic_type() == T_INT) {
uint start, end;
VectorNode::vector_operands(n, &start, &end);
- const Type* vt = velt_type(n);
for (uint j = start; j < end; j++) {
Node* in = n->in(j);
@@ -1801,6 +1800,24 @@ void SuperWord::compute_vector_element_type() {
}
}
if (same_type) {
+ // For right shifts of small integer types (bool, byte, char, short)
+ // we need precise information about sign-ness. Only Load nodes have
+ // this information because Store nodes are the same for signed and
+ // unsigned values. And any arithmetic operation after a load may
+ // expand a value to signed Int so such right shifts can't be used
+ // because vector elements do not have upper bits of Int.
+ const Type* vt = vtn;
+ if (VectorNode::is_shift(in)) {
+ Node* load = in->in(1);
+ if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
+ vt = velt_type(load);
+ } else if (in->Opcode() != Op_LShiftI) {
+ // Widen type to Int to avoid creation of right shift vector
+ // (align + data_size(s1) check in stmts_can_pack() will fail).
+ // Note, left shifts work regardless type.
+ vt = TypeInt::INT;
+ }
+ }
set_velt_type(in, vt);
}
}
@@ -1841,7 +1858,20 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
// Smallest type containing range of values
const Type* SuperWord::container_type(Node* n) {
if (n->is_Mem()) {
- return Type::get_const_basic_type(n->as_Mem()->memory_type());
+ BasicType bt = n->as_Mem()->memory_type();
+ if (n->is_Store() && (bt == T_CHAR)) {
+ // Use T_SHORT type instead of T_CHAR for stored values because any
+ // preceding arithmetic operation extends values to signed Int.
+ bt = T_SHORT;
+ }
+ if (n->Opcode() == Op_LoadUB) {
+ // Adjust type for unsigned byte loads, it is important for right shifts.
+ // T_BOOLEAN is used because there is no basic type representing type
+ // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
+ // size (one byte) and sign is important.
+ bt = T_BOOLEAN;
+ }
+ return Type::get_const_basic_type(bt);
}
const Type* t = _igvn.type(n);
if (t->basic_type() == T_INT) {
diff --git a/src/share/vm/opto/type.cpp b/src/share/vm/opto/type.cpp
index f982799f6..1a8ee2597 100644
--- a/src/share/vm/opto/type.cpp
+++ b/src/share/vm/opto/type.cpp
@@ -61,7 +61,7 @@ Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "tuple:", false, Node::NotAMachineReg, relocInfo::none }, // Tuple
{ Bad, T_ARRAY, "array:", false, Node::NotAMachineReg, relocInfo::none }, // Array
-#if defined(IA32) || defined(AMD64)
+#ifndef SPARC
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp
index d0955a819..9660d4ed8 100644
--- a/src/share/vm/opto/vectornode.cpp
+++ b/src/share/vm/opto/vectornode.cpp
@@ -29,8 +29,7 @@
//------------------------------VectorNode--------------------------------------
// Return the vector operator for the specified scalar operation
-// and vector length. Also used to check if the code generator
-// supports the vector operation.
+// and vector length.
int VectorNode::opcode(int sopc, BasicType bt) {
switch (sopc) {
case Op_AddI:
@@ -75,7 +74,7 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case T_BYTE: return 0; // Unimplemented
case T_CHAR:
case T_SHORT: return Op_MulVS;
- case T_INT: return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1
+ case T_INT: return Op_MulVI;
}
ShouldNotReachHere();
case Op_MulF:
@@ -104,9 +103,9 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return Op_LShiftVL;
case Op_RShiftI:
switch (bt) {
- case T_BOOLEAN:
+ case T_BOOLEAN:return Op_URShiftVB; // boolean is unsigned value
+ case T_CHAR: return Op_URShiftVS; // char is unsigned value
case T_BYTE: return Op_RShiftVB;
- case T_CHAR:
case T_SHORT: return Op_RShiftVS;
case T_INT: return Op_RShiftVI;
}
@@ -116,10 +115,14 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return Op_RShiftVL;
case Op_URShiftI:
switch (bt) {
- case T_BOOLEAN:
- case T_BYTE: return Op_URShiftVB;
- case T_CHAR:
- case T_SHORT: return Op_URShiftVS;
+ case T_BOOLEAN:return Op_URShiftVB;
+ case T_CHAR: return Op_URShiftVS;
+ case T_BYTE:
+ case T_SHORT: return 0; // Vector logical right shift for signed short
+ // values produces incorrect Java result for
+ // negative data because java code should convert
+ // a short value into int value with sign
+ // extension before a shift.
case T_INT: return Op_URShiftVI;
}
ShouldNotReachHere();
@@ -157,12 +160,14 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return 0; // Unimplemented
}
+// Also used to check if the code generator
+// supports the vector operation.
bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
if (is_java_primitive(bt) &&
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt);
- return vopc > 0 && Matcher::has_match_rule(vopc);
+ return vopc > 0 && Matcher::match_rule_supported(vopc);
}
return false;
}
diff --git a/src/share/vm/prims/unsafe.cpp b/src/share/vm/prims/unsafe.cpp
index 051c85975..e3750e66f 100644
--- a/src/share/vm/prims/unsafe.cpp
+++ b/src/share/vm/prims/unsafe.cpp
@@ -124,6 +124,8 @@ inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
assert((void*)p->obj_field_addr<oop>((jint)byte_offset) == ptr_plus_disp,
"raw [ptr+disp] must be consistent with oop::field_base");
}
+ jlong p_size = HeapWordSize * (jlong)(p->size());
+ assert(byte_offset < p_size, err_msg("Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, byte_offset, p_size));
}
#endif
if (sizeof(char*) == sizeof(jint)) // (this constant folds!)
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index 0b55da2c0..12fbddc29 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -791,6 +791,10 @@ void Arguments::print_on(outputStream* st) {
st->print("jvm_args: "); print_jvm_args_on(st);
}
st->print_cr("java_command: %s", java_command() ? java_command() : "<unknown>");
+ if (_java_class_path != NULL) {
+ char* path = _java_class_path->value();
+ st->print_cr("java_class_path (initial): %s", strlen(path) == 0 ? "<not set>" : path );
+ }
st->print_cr("Launcher Type: %s", _sun_java_launcher);
}
@@ -2771,6 +2775,11 @@ SOLARIS_ONLY(
return JNI_EINVAL;
}
FLAG_SET_CMDLINE(uintx, MaxDirectMemorySize, max_direct_memory_size);
+ } else if (match_option(option, "-XX:+UseVMInterruptibleIO", &tail)) {
+ // NOTE! In JDK 9, the UseVMInterruptibleIO flag will completely go
+ // away and will cause VM initialization failures!
+ warning("-XX:+UseVMInterruptibleIO is obsolete and will be removed in a future release.");
+ FLAG_SET_CMDLINE(bool, UseVMInterruptibleIO, true);
} else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
// Skip -XX:Flags= since that case has already been handled
if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) {
@@ -2786,10 +2795,6 @@ SOLARIS_ONLY(
// Change the default value for flags which have different default values
// when working with older JDKs.
- if (JDK_Version::current().compare_major(6) <= 0 &&
- FLAG_IS_DEFAULT(UseVMInterruptibleIO)) {
- FLAG_SET_DEFAULT(UseVMInterruptibleIO, true);
- }
#ifdef LINUX
if (JDK_Version::current().compare_major(6) <= 0 &&
FLAG_IS_DEFAULT(UseLinuxPosixThreadCPUClocks)) {
diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp
index e783883eb..39e6a98ab 100644
--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -533,6 +533,9 @@ class CommandLineFlags {
product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \
\
+ product(bool, UseAES, false, \
+ "Control whether AES instructions can be used on x86/x64") \
+ \
product(uintx, LargePageSizeInBytes, 0, \
"Large page size (0 to let VM choose the page size") \
\
@@ -635,6 +638,9 @@ class CommandLineFlags {
product(bool, UseSSE42Intrinsics, false, \
"SSE4.2 versions of intrinsics") \
\
+ product(bool, UseAESIntrinsics, false, \
+ "use intrinsics for AES versions of crypto") \
+ \
develop(bool, TraceCallFixup, false, \
"traces all call fixups") \
\
diff --git a/src/share/vm/runtime/handles.cpp b/src/share/vm/runtime/handles.cpp
index cb53088ee..a37e46da1 100644
--- a/src/share/vm/runtime/handles.cpp
+++ b/src/share/vm/runtime/handles.cpp
@@ -158,13 +158,18 @@ HandleMark::~HandleMark() {
// Delete later chunks
if( _chunk->next() ) {
+ // reset arena size before delete chunks. Otherwise, the total
+ // arena size could exceed total chunk size
+ assert(area->size_in_bytes() > size_in_bytes(), "Sanity check");
+ area->set_size_in_bytes(size_in_bytes());
_chunk->next_chop();
+ } else {
+ assert(area->size_in_bytes() == size_in_bytes(), "Sanity check");
}
// Roll back arena to saved top markers
area->_chunk = _chunk;
area->_hwm = _hwm;
area->_max = _max;
- area->set_size_in_bytes(_size_in_bytes);
#ifdef ASSERT
// clear out first chunk (to detect allocation bugs)
if (ZapVMHandleArea) {
diff --git a/src/share/vm/runtime/handles.hpp b/src/share/vm/runtime/handles.hpp
index cab3dc581..8c643d7c2 100644
--- a/src/share/vm/runtime/handles.hpp
+++ b/src/share/vm/runtime/handles.hpp
@@ -297,6 +297,7 @@ class HandleMark {
void set_previous_handle_mark(HandleMark* mark) { _previous_handle_mark = mark; }
HandleMark* previous_handle_mark() const { return _previous_handle_mark; }
+ size_t size_in_bytes() const { return _size_in_bytes; }
public:
HandleMark(); // see handles_inline.hpp
HandleMark(Thread* thread) { initialize(thread); }
diff --git a/src/share/vm/runtime/handles.inline.hpp b/src/share/vm/runtime/handles.inline.hpp
index 96dd94cc5..51c31ff4b 100644
--- a/src/share/vm/runtime/handles.inline.hpp
+++ b/src/share/vm/runtime/handles.inline.hpp
@@ -136,13 +136,18 @@ inline void HandleMark::pop_and_restore() {
HandleArea* area = _area; // help compilers with poor alias analysis
// Delete later chunks
if( _chunk->next() ) {
+ // reset arena size before delete chunks. Otherwise, the total
+ // arena size could exceed total chunk size
+ assert(area->size_in_bytes() > size_in_bytes(), "Sanity check");
+ area->set_size_in_bytes(size_in_bytes());
_chunk->next_chop();
+ } else {
+ assert(area->size_in_bytes() == size_in_bytes(), "Sanity check");
}
// Roll back arena to saved top markers
area->_chunk = _chunk;
area->_hwm = _hwm;
area->_max = _max;
- area->set_size_in_bytes(_size_in_bytes);
debug_only(area->_handle_mark_nesting--);
}
diff --git a/src/share/vm/runtime/os.cpp b/src/share/vm/runtime/os.cpp
index 550e64a27..56bd82613 100644
--- a/src/share/vm/runtime/os.cpp
+++ b/src/share/vm/runtime/os.cpp
@@ -600,9 +600,7 @@ void* os::malloc(size_t size, MEMFLAGS memflags, address caller) {
if (PrintMalloc && tty != NULL) tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock);
// we do not track MallocCushion memory
- if (MemTracker::is_on()) {
MemTracker::record_malloc((address)memblock, size, memflags, caller == 0 ? CALLER_PC : caller);
- }
return memblock;
}
@@ -613,7 +611,7 @@ void* os::realloc(void *memblock, size_t size, MEMFLAGS memflags, address caller
NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1));
NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size));
void* ptr = ::realloc(memblock, size);
- if (ptr != NULL && MemTracker::is_on()) {
+ if (ptr != NULL) {
MemTracker::record_realloc((address)memblock, (address)ptr, size, memflags,
caller == 0 ? CALLER_PC : caller);
}
@@ -1401,7 +1399,7 @@ bool os::create_stack_guard_pages(char* addr, size_t bytes) {
char* os::reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
char* result = pd_reserve_memory(bytes, addr, alignment_hint);
- if (result != NULL && MemTracker::is_on()) {
+ if (result != NULL) {
MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
}
@@ -1409,7 +1407,7 @@ char* os::reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
}
char* os::attempt_reserve_memory_at(size_t bytes, char* addr) {
char* result = pd_attempt_reserve_memory_at(bytes, addr);
- if (result != NULL && MemTracker::is_on()) {
+ if (result != NULL) {
MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
}
return result;
@@ -1422,7 +1420,7 @@ void os::split_reserved_memory(char *base, size_t size,
bool os::commit_memory(char* addr, size_t bytes, bool executable) {
bool res = pd_commit_memory(addr, bytes, executable);
- if (res && MemTracker::is_on()) {
+ if (res) {
MemTracker::record_virtual_memory_commit((address)addr, bytes, CALLER_PC);
}
return res;
@@ -1431,7 +1429,7 @@ bool os::commit_memory(char* addr, size_t bytes, bool executable) {
bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
bool executable) {
bool res = os::pd_commit_memory(addr, size, alignment_hint, executable);
- if (res && MemTracker::is_on()) {
+ if (res) {
MemTracker::record_virtual_memory_commit((address)addr, size, CALLER_PC);
}
return res;
@@ -1458,8 +1456,9 @@ char* os::map_memory(int fd, const char* file_name, size_t file_offset,
char *addr, size_t bytes, bool read_only,
bool allow_exec) {
char* result = pd_map_memory(fd, file_name, file_offset, addr, bytes, read_only, allow_exec);
- if (result != NULL && MemTracker::is_on()) {
+ if (result != NULL) {
MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
+ MemTracker::record_virtual_memory_commit((address)result, bytes, CALLER_PC);
}
return result;
}
@@ -1474,6 +1473,7 @@ char* os::remap_memory(int fd, const char* file_name, size_t file_offset,
bool os::unmap_memory(char *addr, size_t bytes) {
bool result = pd_unmap_memory(addr, bytes);
if (result) {
+ MemTracker::record_virtual_memory_uncommit((address)addr, bytes);
MemTracker::record_virtual_memory_release((address)addr, bytes);
}
return result;
diff --git a/src/share/vm/runtime/stubRoutines.cpp b/src/share/vm/runtime/stubRoutines.cpp
index 5ca4ba599..98d428abd 100644
--- a/src/share/vm/runtime/stubRoutines.cpp
+++ b/src/share/vm/runtime/stubRoutines.cpp
@@ -120,6 +120,10 @@ address StubRoutines::_arrayof_jbyte_fill;
address StubRoutines::_arrayof_jshort_fill;
address StubRoutines::_arrayof_jint_fill;
+address StubRoutines::_aescrypt_encryptBlock = NULL;
+address StubRoutines::_aescrypt_decryptBlock = NULL;
+address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
+address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
double (* StubRoutines::_intrinsic_log )(double) = NULL;
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
diff --git a/src/share/vm/runtime/stubRoutines.hpp b/src/share/vm/runtime/stubRoutines.hpp
index 0e583aea0..91f273e65 100644
--- a/src/share/vm/runtime/stubRoutines.hpp
+++ b/src/share/vm/runtime/stubRoutines.hpp
@@ -199,6 +199,11 @@ class StubRoutines: AllStatic {
// zero heap space aligned to jlong (8 bytes)
static address _zero_aligned_words;
+ static address _aescrypt_encryptBlock;
+ static address _aescrypt_decryptBlock;
+ static address _cipherBlockChaining_encryptAESCrypt;
+ static address _cipherBlockChaining_decryptAESCrypt;
+
// These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for
// constant folding in the compiler to ensure equivalence. If the
@@ -330,6 +335,11 @@ class StubRoutines: AllStatic {
static address arrayof_jshort_fill() { return _arrayof_jshort_fill; }
static address arrayof_jint_fill() { return _arrayof_jint_fill; }
+ static address aescrypt_encryptBlock() { return _aescrypt_encryptBlock; }
+ static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
+ static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
+ static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
+
static address select_fill_function(BasicType t, bool aligned, const char* &name);
static address zero_aligned_words() { return _zero_aligned_words; }
diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp
index 0ed9a4b1f..5c96224a4 100644
--- a/src/share/vm/runtime/thread.cpp
+++ b/src/share/vm/runtime/thread.cpp
@@ -323,12 +323,10 @@ void Thread::record_stack_base_and_size() {
os::initialize_thread(this);
#if INCLUDE_NMT
- // record thread's native stack, stack grows downward
- if (MemTracker::is_on()) {
- address stack_low_addr = stack_base() - stack_size();
- MemTracker::record_thread_stack(stack_low_addr, stack_size(), this,
+ // record thread's native stack, stack grows downward
+ address stack_low_addr = stack_base() - stack_size();
+ MemTracker::record_thread_stack(stack_low_addr, stack_size(), this,
CURRENT_PC);
- }
#endif // INCLUDE_NMT
}
@@ -345,6 +343,9 @@ Thread::~Thread() {
if (_stack_base != NULL) {
address low_stack_addr = stack_base() - stack_size();
MemTracker::release_thread_stack(low_stack_addr, stack_size(), this);
+#ifdef ASSERT
+ set_stack_base(NULL);
+#endif
}
#endif // INCLUDE_NMT
@@ -1521,10 +1522,12 @@ JavaThread::~JavaThread() {
tty->print_cr("terminate thread %p", this);
}
- // Info NMT that this JavaThread is exiting, its memory
- // recorder should be collected
+ // By now, this thread should already be invisible to safepoint,
+ // and its per-thread recorder also collected.
assert(!is_safepoint_visible(), "wrong state");
- MemTracker::thread_exiting(this);
+#if INCLUDE_NMT
+ assert(get_recorder() == NULL, "Already collected");
+#endif // INCLUDE_NMT
// JSR166 -- return the parker to the free list
Parker::Release(_parker);
@@ -2425,6 +2428,7 @@ void JavaThread::create_stack_guard_pages() {
}
void JavaThread::remove_stack_guard_pages() {
+ assert(Thread::current() == this, "from different thread");
if (_stack_guard_state == stack_guard_unused) return;
address low_addr = stack_base() - stack_size();
size_t len = (StackYellowPages + StackRedPages) * os::vm_page_size();
@@ -4093,7 +4097,10 @@ void Threads::remove(JavaThread* p) {
// Now, this thread is not visible to safepoint
p->set_safepoint_visible(false);
-
+ // once the thread becomes safepoint invisible, we can not use its per-thread
+ // recorder. And Threads::do_threads() no longer walks this thread, so we have
+ // to release its per-thread recorder here.
+ MemTracker::thread_exiting(p);
} // unlock Threads_lock
// Since Events::log uses a lock, we grab it outside the Threads_lock
diff --git a/src/share/vm/runtime/vmStructs.cpp b/src/share/vm/runtime/vmStructs.cpp
index 51e3ab5f7..9159ad945 100644
--- a/src/share/vm/runtime/vmStructs.cpp
+++ b/src/share/vm/runtime/vmStructs.cpp
@@ -59,6 +59,7 @@
#include "memory/generation.hpp"
#include "memory/generationSpec.hpp"
#include "memory/heap.hpp"
+#include "memory/metablock.hpp"
#include "memory/space.hpp"
#include "memory/tenuredGeneration.hpp"
#include "memory/universe.hpp"
@@ -249,6 +250,7 @@ typedef TwoOopHashtable<Klass*, mtClass> KlassTwoOopHashtable;
typedef Hashtable<Klass*, mtClass> KlassHashtable;
typedef HashtableEntry<Klass*, mtClass> KlassHashtableEntry;
typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
+typedef BinaryTreeDictionary<Metablock, FreeList> MetablockTreeDictionary;
//--------------------------------------------------------------------------------
// VM_STRUCTS
@@ -1237,7 +1239,15 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
nonstatic_field(AccessFlags, _flags, jint) \
nonstatic_field(elapsedTimer, _counter, jlong) \
nonstatic_field(elapsedTimer, _active, bool) \
- nonstatic_field(InvocationCounter, _counter, unsigned int)
+ nonstatic_field(InvocationCounter, _counter, unsigned int) \
+ volatile_nonstatic_field(FreeChunk, _size, size_t) \
+ nonstatic_field(FreeChunk, _next, FreeChunk*) \
+ nonstatic_field(FreeChunk, _prev, FreeChunk*) \
+ nonstatic_field(FreeList<FreeChunk>, _size, size_t) \
+ nonstatic_field(FreeList<Metablock>, _size, size_t) \
+ nonstatic_field(FreeList<FreeChunk>, _count, ssize_t) \
+ nonstatic_field(FreeList<Metablock>, _count, ssize_t) \
+ nonstatic_field(MetablockTreeDictionary, _total_size, size_t)
/* NOTE that we do not use the last_entry() macro here; it is used */
/* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must */
@@ -2080,7 +2090,24 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
declare_toplevel_type(Universe) \
declare_toplevel_type(vframeArray) \
declare_toplevel_type(vframeArrayElement) \
- declare_toplevel_type(Annotations*)
+ declare_toplevel_type(Annotations*) \
+ \
+ /***************/ \
+ /* Miscellaneous types */ \
+ /***************/ \
+ \
+ /* freelist */ \
+ declare_toplevel_type(FreeChunk*) \
+ declare_toplevel_type(Metablock*) \
+ declare_toplevel_type(FreeBlockDictionary<FreeChunk>*) \
+ declare_toplevel_type(FreeList<FreeChunk>*) \
+ declare_toplevel_type(FreeList<FreeChunk>) \
+ declare_toplevel_type(FreeBlockDictionary<Metablock>*) \
+ declare_toplevel_type(FreeList<Metablock>*) \
+ declare_toplevel_type(FreeList<Metablock>) \
+ declare_toplevel_type(MetablockTreeDictionary*) \
+ declare_type(MetablockTreeDictionary, FreeBlockDictionary<Metablock>) \
+ declare_type(MetablockTreeDictionary, FreeBlockDictionary<Metablock>)
/* NOTE that we do not use the last_entry() macro here; it is used */
diff --git a/src/share/vm/services/attachListener.cpp b/src/share/vm/services/attachListener.cpp
index 983f3f95f..80dfd7eff 100644
--- a/src/share/vm/services/attachListener.cpp
+++ b/src/share/vm/services/attachListener.cpp
@@ -404,6 +404,8 @@ static AttachOperationFunctionInfo funcs[] = {
static void attach_listener_thread_entry(JavaThread* thread, TRAPS) {
os::set_priority(thread, NearMaxPriority);
+ thread->record_stack_base_and_size();
+
if (AttachListener::pd_init() != 0) {
return;
}
diff --git a/src/share/vm/services/memBaseline.cpp b/src/share/vm/services/memBaseline.cpp
index 5b8297516..57bdb0138 100644
--- a/src/share/vm/services/memBaseline.cpp
+++ b/src/share/vm/services/memBaseline.cpp
@@ -40,6 +40,7 @@ MemType2Name MemBaseline::MemType2NameMap[NUMBER_OF_MEMORY_TYPE] = {
{mtSymbol, "Symbol"},
{mtNMT, "Memory Tracking"},
{mtChunk, "Pooled Free Chunks"},
+ {mtClassShared,"Shared spaces for classes"},
{mtNone, "Unknown"} // It can happen when type tagging records are lagging
// behind
};
@@ -55,6 +56,7 @@ MemBaseline::MemBaseline() {
_malloc_cs = NULL;
_vm_cs = NULL;
+ _vm_map = NULL;
_number_of_classes = 0;
_number_of_threads = 0;
@@ -72,6 +74,11 @@ void MemBaseline::clear() {
_vm_cs = NULL;
}
+ if (_vm_map != NULL) {
+ delete _vm_map;
+ _vm_map = NULL;
+ }
+
reset();
}
@@ -85,6 +92,7 @@ void MemBaseline::reset() {
if (_malloc_cs != NULL) _malloc_cs->clear();
if (_vm_cs != NULL) _vm_cs->clear();
+ if (_vm_map != NULL) _vm_map->clear();
for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) {
_malloc_data[index].clear();
@@ -94,39 +102,33 @@ void MemBaseline::reset() {
}
MemBaseline::~MemBaseline() {
- if (_malloc_cs != NULL) {
- delete _malloc_cs;
- }
-
- if (_vm_cs != NULL) {
- delete _vm_cs;
- }
+ clear();
}
// baseline malloc'd memory records, generate overall summary and summaries by
// memory types
bool MemBaseline::baseline_malloc_summary(const MemPointerArray* malloc_records) {
- MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records);
- MemPointerRecord* mptr = (MemPointerRecord*)mItr.current();
+ MemPointerArrayIteratorImpl malloc_itr((MemPointerArray*)malloc_records);
+ MemPointerRecord* malloc_ptr = (MemPointerRecord*)malloc_itr.current();
size_t used_arena_size = 0;
int index;
- while (mptr != NULL) {
- index = flag2index(FLAGS_TO_MEMORY_TYPE(mptr->flags()));
- size_t size = mptr->size();
+ while (malloc_ptr != NULL) {
+ index = flag2index(FLAGS_TO_MEMORY_TYPE(malloc_ptr->flags()));
+ size_t size = malloc_ptr->size();
_total_malloced += size;
_malloc_data[index].inc(size);
- if (MemPointerRecord::is_arena_record(mptr->flags())) {
+ if (MemPointerRecord::is_arena_record(malloc_ptr->flags())) {
// see if arena size record present
- MemPointerRecord* next_p = (MemPointerRecordEx*)mItr.peek_next();
- if (MemPointerRecord::is_arena_size_record(next_p->flags())) {
- assert(next_p->is_size_record_of_arena(mptr), "arena records do not match");
- size = next_p->size();
+ MemPointerRecord* next_malloc_ptr = (MemPointerRecordEx*)malloc_itr.peek_next();
+ if (MemPointerRecord::is_arena_size_record(next_malloc_ptr->flags())) {
+ assert(next_malloc_ptr->is_size_record_of_arena(malloc_ptr), "arena records do not match");
+ size = next_malloc_ptr->size();
_arena_data[index].inc(size);
used_arena_size += size;
- mItr.next();
+ malloc_itr.next();
}
}
- mptr = (MemPointerRecordEx*)mItr.next();
+ malloc_ptr = (MemPointerRecordEx*)malloc_itr.next();
}
// substract used arena size to get size of arena chunk in free list
@@ -142,20 +144,23 @@ bool MemBaseline::baseline_malloc_summary(const MemPointerArray* malloc_records)
// baseline mmap'd memory records, generate overall summary and summaries by
// memory types
bool MemBaseline::baseline_vm_summary(const MemPointerArray* vm_records) {
- MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records);
- VMMemRegion* vptr = (VMMemRegion*)vItr.current();
+ MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records);
+ VMMemRegion* vm_ptr = (VMMemRegion*)vm_itr.current();
int index;
- while (vptr != NULL) {
- index = flag2index(FLAGS_TO_MEMORY_TYPE(vptr->flags()));
-
+ while (vm_ptr != NULL) {
+ if (vm_ptr->is_reserved_region()) {
+ index = flag2index(FLAGS_TO_MEMORY_TYPE(vm_ptr->flags()));
// we use the number of thread stack to count threads
- if (IS_MEMORY_TYPE(vptr->flags(), mtThreadStack)) {
+ if (IS_MEMORY_TYPE(vm_ptr->flags(), mtThreadStack)) {
_number_of_threads ++;
}
- _total_vm_reserved += vptr->reserved_size();
- _total_vm_committed += vptr->committed_size();
- _vm_data[index].inc(vptr->reserved_size(), vptr->committed_size());
- vptr = (VMMemRegion*)vItr.next();
+ _total_vm_reserved += vm_ptr->size();
+ _vm_data[index].inc(vm_ptr->size(), 0);
+ } else {
+ _total_vm_committed += vm_ptr->size();
+ _vm_data[index].inc(0, vm_ptr->size());
+ }
+ vm_ptr = (VMMemRegion*)vm_itr.next();
}
return true;
}
@@ -165,41 +170,57 @@ bool MemBaseline::baseline_vm_summary(const MemPointerArray* vm_records) {
bool MemBaseline::baseline_malloc_details(const MemPointerArray* malloc_records) {
assert(MemTracker::track_callsite(), "detail tracking is off");
- MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records);
- MemPointerRecordEx* mptr = (MemPointerRecordEx*)mItr.current();
- MallocCallsitePointer mp;
+ MemPointerArrayIteratorImpl malloc_itr(const_cast<MemPointerArray*>(malloc_records));
+ MemPointerRecordEx* malloc_ptr = (MemPointerRecordEx*)malloc_itr.current();
+ MallocCallsitePointer malloc_callsite;
+ // initailize malloc callsite array
if (_malloc_cs == NULL) {
_malloc_cs = new (std::nothrow) MemPointerArrayImpl<MallocCallsitePointer>(64);
// out of native memory
- if (_malloc_cs == NULL) {
+ if (_malloc_cs == NULL || _malloc_cs->out_of_memory()) {
return false;
}
} else {
_malloc_cs->clear();
}
+ MemPointerArray* malloc_data = const_cast<MemPointerArray*>(malloc_records);
+
+ // sort into callsite pc order. Details are aggregated by callsites
+ malloc_data->sort((FN_SORT)malloc_sort_by_pc);
+ bool ret = true;
+
// baseline memory that is totaled over 1 KB
- while (mptr != NULL) {
- if (!MemPointerRecord::is_arena_size_record(mptr->flags())) {
+ while (malloc_ptr != NULL) {
+ if (!MemPointerRecord::is_arena_size_record(malloc_ptr->flags())) {
// skip thread stacks
- if (!IS_MEMORY_TYPE(mptr->flags(), mtThreadStack)) {
- if (mp.addr() != mptr->pc()) {
- if ((mp.amount()/K) > 0) {
- if (!_malloc_cs->append(&mp)) {
- return false;
+ if (!IS_MEMORY_TYPE(malloc_ptr->flags(), mtThreadStack)) {
+ if (malloc_callsite.addr() != malloc_ptr->pc()) {
+ if ((malloc_callsite.amount()/K) > 0) {
+ if (!_malloc_cs->append(&malloc_callsite)) {
+ ret = false;
+ break;
}
}
- mp = MallocCallsitePointer(mptr->pc());
+ malloc_callsite = MallocCallsitePointer(malloc_ptr->pc());
}
- mp.inc(mptr->size());
+ malloc_callsite.inc(malloc_ptr->size());
}
}
- mptr = (MemPointerRecordEx*)mItr.next();
+ malloc_ptr = (MemPointerRecordEx*)malloc_itr.next();
}
- if (mp.addr() != 0 && (mp.amount()/K) > 0) {
- if (!_malloc_cs->append(&mp)) {
+ // restore to address order. Snapshot malloc data is maintained in memory
+ // address order.
+ malloc_data->sort((FN_SORT)malloc_sort_by_addr);
+
+ if (!ret) {
+ return false;
+ }
+ // deal with last record
+ if (malloc_callsite.addr() != 0 && (malloc_callsite.amount()/K) > 0) {
+ if (!_malloc_cs->append(&malloc_callsite)) {
return false;
}
}
@@ -210,34 +231,106 @@ bool MemBaseline::baseline_malloc_details(const MemPointerArray* malloc_records)
bool MemBaseline::baseline_vm_details(const MemPointerArray* vm_records) {
assert(MemTracker::track_callsite(), "detail tracking is off");
- VMCallsitePointer vp;
- MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records);
- VMMemRegionEx* vptr = (VMMemRegionEx*)vItr.current();
+ VMCallsitePointer vm_callsite;
+ VMCallsitePointer* cur_callsite = NULL;
+ MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records);
+ VMMemRegionEx* vm_ptr = (VMMemRegionEx*)vm_itr.current();
+
+ // initialize virtual memory map array
+ if (_vm_map == NULL) {
+ _vm_map = new (std::nothrow) MemPointerArrayImpl<VMMemRegionEx>(vm_records->length());
+ if (_vm_map == NULL || _vm_map->out_of_memory()) {
+ return false;
+ }
+ } else {
+ _vm_map->clear();
+ }
+ // initialize virtual memory callsite array
if (_vm_cs == NULL) {
_vm_cs = new (std::nothrow) MemPointerArrayImpl<VMCallsitePointer>(64);
- if (_vm_cs == NULL) {
+ if (_vm_cs == NULL || _vm_cs->out_of_memory()) {
return false;
}
} else {
_vm_cs->clear();
}
- while (vptr != NULL) {
- if (vp.addr() != vptr->pc()) {
- if (!_vm_cs->append(&vp)) {
+ // consolidate virtual memory data
+ VMMemRegionEx* reserved_rec = NULL;
+ VMMemRegionEx* committed_rec = NULL;
+
+ // vm_ptr is coming in increasing base address order
+ while (vm_ptr != NULL) {
+ if (vm_ptr->is_reserved_region()) {
+ // consolidate reserved memory regions for virtual memory map.
+ // The criteria for consolidation is:
+ // 1. two adjacent reserved memory regions
+ // 2. belong to the same memory type
+ // 3. reserved from the same callsite
+ if (reserved_rec == NULL ||
+ reserved_rec->base() + reserved_rec->size() != vm_ptr->addr() ||
+ FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) != FLAGS_TO_MEMORY_TYPE(vm_ptr->flags()) ||
+ reserved_rec->pc() != vm_ptr->pc()) {
+ if (!_vm_map->append(vm_ptr)) {
return false;
}
- vp = VMCallsitePointer(vptr->pc());
+ // inserted reserved region, we need the pointer to the element in virtual
+ // memory map array.
+ reserved_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1);
+ } else {
+ reserved_rec->expand_region(vm_ptr->addr(), vm_ptr->size());
}
- vp.inc(vptr->size(), vptr->committed_size());
- vptr = (VMMemRegionEx*)vItr.next();
- }
- if (vp.addr() != 0) {
- if (!_vm_cs->append(&vp)) {
+
+ if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) {
return false;
}
+ vm_callsite = VMCallsitePointer(vm_ptr->pc());
+ cur_callsite = &vm_callsite;
+ vm_callsite.inc(vm_ptr->size(), 0);
+ } else {
+ // consolidate committed memory regions for virtual memory map
+ // The criterial is:
+ // 1. two adjacent committed memory regions
+ // 2. committed from the same callsite
+ if (committed_rec == NULL ||
+ committed_rec->base() + committed_rec->size() != vm_ptr->addr() ||
+ committed_rec->pc() != vm_ptr->pc()) {
+ if (!_vm_map->append(vm_ptr)) {
+ return false;
+ }
+ committed_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1);
+ } else {
+ committed_rec->expand_region(vm_ptr->addr(), vm_ptr->size());
+ }
+ vm_callsite.inc(0, vm_ptr->size());
+ }
+ vm_ptr = (VMMemRegionEx*)vm_itr.next();
}
+ // deal with last record
+ if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) {
+ return false;
+ }
+
+ // sort it into callsite pc order. Details are aggregated by callsites
+ _vm_cs->sort((FN_SORT)bl_vm_sort_by_pc);
+
+ // walk the array to consolidate record by pc
+ MemPointerArrayIteratorImpl itr(_vm_cs);
+ VMCallsitePointer* callsite_rec = (VMCallsitePointer*)itr.current();
+ VMCallsitePointer* next_rec = (VMCallsitePointer*)itr.next();
+ while (next_rec != NULL) {
+ assert(callsite_rec != NULL, "Sanity check");
+ if (next_rec->addr() == callsite_rec->addr()) {
+ callsite_rec->inc(next_rec->reserved_amount(), next_rec->committed_amount());
+ itr.remove();
+ next_rec = (VMCallsitePointer*)itr.current();
+ } else {
+ callsite_rec = next_rec;
+ next_rec = (VMCallsitePointer*)itr.next();
+ }
+ }
+
return true;
}
@@ -251,12 +344,8 @@ bool MemBaseline::baseline(MemSnapshot& snapshot, bool summary_only) {
_number_of_classes = SystemDictionary::number_of_classes();
if (!summary_only && MemTracker::track_callsite() && _baselined) {
- ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_pc);
- ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_pc);
_baselined = baseline_malloc_details(snapshot._alloc_ptrs) &&
baseline_vm_details(snapshot._vm_ptrs);
- ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_addr);
- ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_addr);
}
return _baselined;
}
@@ -278,7 +367,7 @@ const char* MemBaseline::type2name(MEMFLAGS type) {
return MemType2NameMap[index]._name;
}
}
- assert(false, "no type");
+ assert(false, err_msg("bad type %x", type));
return NULL;
}
@@ -341,13 +430,6 @@ int MemBaseline::bl_malloc_sort_by_pc(const void* p1, const void* p2) {
return UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
}
-// sort snapshot mmap'd records in callsite pc order
-int MemBaseline::vm_sort_by_pc(const void* p1, const void* p2) {
- assert(MemTracker::track_callsite(),"Just check");
- const VMMemRegionEx* mp1 = (const VMMemRegionEx*)p1;
- const VMMemRegionEx* mp2 = (const VMMemRegionEx*)p2;
- return UNSIGNED_COMPARE(mp1->pc(), mp2->pc());
-}
// sort baselined mmap'd records in size (reserved size) order
int MemBaseline::bl_vm_sort_by_size(const void* p1, const void* p2) {
@@ -376,12 +458,3 @@ int MemBaseline::malloc_sort_by_addr(const void* p1, const void* p2) {
return delta;
}
-// sort snapshot mmap'd records in memory block address order
-int MemBaseline::vm_sort_by_addr(const void* p1, const void* p2) {
- assert(MemTracker::is_on(), "Just check");
- const VMMemRegion* mp1 = (const VMMemRegion*)p1;
- const VMMemRegion* mp2 = (const VMMemRegion*)p2;
- int delta = UNSIGNED_COMPARE(mp1->addr(), mp2->addr());
- assert(delta != 0, "dup pointer");
- return delta;
-}
diff --git a/src/share/vm/services/memBaseline.hpp b/src/share/vm/services/memBaseline.hpp
index 2affa2b96..5f98e30d4 100644
--- a/src/share/vm/services/memBaseline.hpp
+++ b/src/share/vm/services/memBaseline.hpp
@@ -320,6 +320,8 @@ class MemBaseline : public _ValueObj {
// only available when detail tracking is on.
MemPointerArray* _malloc_cs;
MemPointerArray* _vm_cs;
+ // virtual memory map
+ MemPointerArray* _vm_map;
private:
static MemType2Name MemType2NameMap[NUMBER_OF_MEMORY_TYPE];
@@ -432,9 +434,6 @@ class MemBaseline : public _ValueObj {
static int malloc_sort_by_pc(const void* p1, const void* p2);
static int malloc_sort_by_addr(const void* p1, const void* p2);
- static int vm_sort_by_pc(const void* p1, const void* p2);
- static int vm_sort_by_addr(const void* p1, const void* p2);
-
private:
// sorting functions for baselined records
static int bl_malloc_sort_by_size(const void* p1, const void* p2);
diff --git a/src/share/vm/services/memPtr.cpp b/src/share/vm/services/memPtr.cpp
index d2a18765a..5d0fbf5bf 100644
--- a/src/share/vm/services/memPtr.cpp
+++ b/src/share/vm/services/memPtr.cpp
@@ -40,35 +40,3 @@ jint SequenceGenerator::next() {
return seq;
}
-
-
-bool VMMemRegion::contains(const VMMemRegion* mr) const {
- assert(base() != 0, "Sanity check");
- assert(size() != 0 || committed_size() != 0,
- "Sanity check");
- address base_addr = base();
- address end_addr = base_addr +
- (is_reserve_record()? reserved_size(): committed_size());
- if (mr->is_reserve_record()) {
- if (mr->base() == base_addr && mr->size() == size()) {
- // the same range
- return true;
- }
- return false;
- } else if (mr->is_commit_record() || mr->is_uncommit_record()) {
- assert(mr->base() != 0 && mr->committed_size() > 0,
- "bad record");
- return (mr->base() >= base_addr &&
- (mr->base() + mr->committed_size()) <= end_addr);
- } else if (mr->is_type_tagging_record()) {
- assert(mr->base() != NULL, "Sanity check");
- return (mr->base() >= base_addr && mr->base() < end_addr);
- } else if (mr->is_release_record()) {
- assert(mr->base() != 0 && mr->size() > 0,
- "bad record");
- return (mr->base() == base_addr && mr->size() == size());
- } else {
- ShouldNotReachHere();
- return false;
- }
-}
diff --git a/src/share/vm/services/memPtr.hpp b/src/share/vm/services/memPtr.hpp
index bef49b2b4..0618a3c80 100644
--- a/src/share/vm/services/memPtr.hpp
+++ b/src/share/vm/services/memPtr.hpp
@@ -291,6 +291,26 @@ public:
inline bool is_type_tagging_record() const {
return is_virtual_memory_type_record(_flags);
}
+
+ // if the two memory pointer records actually represent the same
+ // memory block
+ inline bool is_same_region(const MemPointerRecord* other) const {
+ return (addr() == other->addr() && size() == other->size());
+ }
+
+ // if this memory region fully contains another one
+ inline bool contains_region(const MemPointerRecord* other) const {
+ return contains_region(other->addr(), other->size());
+ }
+
+ // if this memory region fully contains specified memory range
+ inline bool contains_region(address add, size_t sz) const {
+ return (addr() <= add && addr() + size() >= add + sz);
+ }
+
+ inline bool contains_address(address add) const {
+ return (addr() <= add && addr() + size() > add);
+ }
};
// MemPointerRecordEx also records callsite pc, from where
@@ -321,66 +341,32 @@ class MemPointerRecordEx : public MemPointerRecord {
}
};
-// a virtual memory region
+// a virtual memory region. The region can represent a reserved
+// virtual memory region or a committed memory region
class VMMemRegion : public MemPointerRecord {
- private:
- // committed size
- size_t _committed_size;
-
public:
- VMMemRegion(): _committed_size(0) { }
+ VMMemRegion() { }
void init(const MemPointerRecord* mp) {
- assert(mp->is_vm_pointer(), "not virtual memory pointer");
+ assert(mp->is_vm_pointer(), "Sanity check");
_addr = mp->addr();
- if (mp->is_commit_record() || mp->is_uncommit_record()) {
- _committed_size = mp->size();
- set_size(_committed_size);
- } else {
set_size(mp->size());
- _committed_size = 0;
- }
set_flags(mp->flags());
}
VMMemRegion& operator=(const VMMemRegion& other) {
MemPointerRecord::operator=(other);
- _committed_size = other.committed_size();
return *this;
}
- inline bool is_reserve_record() const {
- return is_virtual_memory_reserve_record(flags());
- }
-
- inline bool is_release_record() const {
- return is_virtual_memory_release_record(flags());
- }
-
- // resize reserved VM range
- inline void set_reserved_size(size_t new_size) {
- assert(new_size >= committed_size(), "resize");
- set_size(new_size);
- }
-
- inline void commit(size_t size) {
- _committed_size += size;
+ inline bool is_reserved_region() const {
+ return is_allocation_record();
}
- inline void uncommit(size_t size) {
- if (_committed_size >= size) {
- _committed_size -= size;
- } else {
- _committed_size = 0;
- }
+ inline bool is_committed_region() const {
+ return is_commit_record();
}
- /*
- * if this virtual memory range covers whole range of
- * the other VMMemRegion
- */
- bool contains(const VMMemRegion* mr) const;
-
/* base address of this virtual memory range */
inline address base() const {
return addr();
@@ -391,13 +377,28 @@ public:
set_flags(flags() | (f & mt_masks));
}
- // release part of memory range
- inline void partial_release(address add, size_t sz) {
- assert(add >= addr() && add < addr() + size(), "not valid address");
- // for now, it can partially release from the both ends,
- // but not in the middle
+ // expand this region to also cover specified range.
+ // The range has to be on either end of the memory region.
+ void expand_region(address addr, size_t sz) {
+ if (addr < base()) {
+ assert(addr + sz == base(), "Sanity check");
+ _addr = addr;
+ set_size(size() + sz);
+ } else {
+ assert(base() + size() == addr, "Sanity check");
+ set_size(size() + sz);
+ }
+ }
+
+ // exclude the specified address range from this region.
+ // The excluded memory range has to be on either end of this memory
+ // region.
+ inline void exclude_region(address add, size_t sz) {
+ assert(is_reserved_region() || is_committed_region(), "Sanity check");
+ assert(addr() != NULL && size() != 0, "Sanity check");
+ assert(add >= addr() && add < addr() + size(), "Sanity check");
assert(add == addr() || (add + sz) == (addr() + size()),
- "release in the middle");
+ "exclude in the middle");
if (add == addr()) {
set_addr(add + sz);
set_size(size() - sz);
@@ -405,16 +406,6 @@ public:
set_size(size() - sz);
}
}
-
- // the committed size of the virtual memory block
- inline size_t committed_size() const {
- return _committed_size;
- }
-
- // the reserved size of the virtual memory block
- inline size_t reserved_size() const {
- return size();
- }
};
class VMMemRegionEx : public VMMemRegion {
diff --git a/src/share/vm/services/memRecorder.cpp b/src/share/vm/services/memRecorder.cpp
index 33db875af..5ec865af3 100644
--- a/src/share/vm/services/memRecorder.cpp
+++ b/src/share/vm/services/memRecorder.cpp
@@ -31,14 +31,19 @@
#include "services/memTracker.hpp"
MemPointer* SequencedRecordIterator::next_record() {
- MemPointer* itr_cur = _itr.current();
- if (itr_cur == NULL) return NULL;
- MemPointer* itr_next = _itr.next();
+ MemPointerRecord* itr_cur = (MemPointerRecord*)_itr.current();
+ if (itr_cur == NULL) {
+ return itr_cur;
+ }
+
+ MemPointerRecord* itr_next = (MemPointerRecord*)_itr.next();
- while (itr_next != NULL &&
- same_kind((MemPointerRecord*)itr_cur, (MemPointerRecord*)itr_next)) {
+ // don't collapse virtual memory records
+ while (itr_next != NULL && !itr_cur->is_vm_pointer() &&
+ !itr_next->is_vm_pointer() &&
+ same_kind(itr_cur, itr_next)) {
itr_cur = itr_next;
- itr_next = _itr.next();
+ itr_next = (MemPointerRecord*)_itr.next();
}
return itr_cur;
diff --git a/src/share/vm/services/memRecorder.hpp b/src/share/vm/services/memRecorder.hpp
index 754cbd6c3..2afeeb09b 100644
--- a/src/share/vm/services/memRecorder.hpp
+++ b/src/share/vm/services/memRecorder.hpp
@@ -188,6 +188,7 @@ class SequencedRecordIterator : public MemPointerArrayIterator {
// Test if the two records are the same kind: the same memory block and allocation
// type.
inline bool same_kind(const MemPointerRecord* p1, const MemPointerRecord* p2) const {
+ assert(!p1->is_vm_pointer() && !p2->is_vm_pointer(), "malloc pointer only");
return (p1->addr() == p2->addr() &&
(p1->flags() &MemPointerRecord::tag_masks) ==
(p2->flags() & MemPointerRecord::tag_masks));
diff --git a/src/share/vm/services/memReporter.cpp b/src/share/vm/services/memReporter.cpp
index 783d951de..602ac1c08 100644
--- a/src/share/vm/services/memReporter.cpp
+++ b/src/share/vm/services/memReporter.cpp
@@ -51,6 +51,7 @@ void BaselineReporter::report_baseline(const MemBaseline& baseline, bool summary
report_summaries(baseline);
if (!summary_only && MemTracker::track_callsite()) {
+ report_virtual_memory_map(baseline);
report_callsites(baseline);
}
_outputer.done();
@@ -74,6 +75,25 @@ void BaselineReporter::report_summaries(const MemBaseline& baseline) {
_outputer.done_category_summary();
}
+void BaselineReporter::report_virtual_memory_map(const MemBaseline& baseline) {
+ _outputer.start_virtual_memory_map();
+ MemBaseline* pBL = const_cast<MemBaseline*>(&baseline);
+ MemPointerArrayIteratorImpl itr = MemPointerArrayIteratorImpl(pBL->_vm_map);
+ VMMemRegionEx* rgn = (VMMemRegionEx*)itr.current();
+ while (rgn != NULL) {
+ if (rgn->is_reserved_region()) {
+ _outputer.reserved_memory_region(FLAGS_TO_MEMORY_TYPE(rgn->flags()),
+ rgn->base(), rgn->base() + rgn->size(), amount_in_current_scale(rgn->size()), rgn->pc());
+ } else {
+ _outputer.committed_memory_region(rgn->base(), rgn->base() + rgn->size(),
+ amount_in_current_scale(rgn->size()), rgn->pc());
+ }
+ rgn = (VMMemRegionEx*)itr.next();
+ }
+
+ _outputer.done_virtual_memory_map();
+}
+
void BaselineReporter::report_callsites(const MemBaseline& baseline) {
_outputer.start_callsite();
MemBaseline* pBL = const_cast<MemBaseline*>(&baseline);
@@ -324,6 +344,40 @@ void BaselineTTYOutputer::done_category_summary() {
_output->print_cr(" ");
}
+
+void BaselineTTYOutputer::start_virtual_memory_map() {
+ _output->print_cr("Virtual memory map:");
+}
+
+void BaselineTTYOutputer::reserved_memory_region(MEMFLAGS type, address base, address end,
+ size_t size, address pc) {
+ const char* unit = memory_unit(_scale);
+ char buf[128];
+ int offset;
+ _output->print_cr(" ");
+ _output->print_cr("[" PTR_FORMAT " - " PTR_FORMAT "] reserved %d%s for %s", base, end, size, unit,
+ MemBaseline::type2name(type));
+ if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+ _output->print_cr("\t\tfrom [%s+0x%x]", buf, offset);
+ }
+}
+
+void BaselineTTYOutputer::committed_memory_region(address base, address end, size_t size, address pc) {
+ const char* unit = memory_unit(_scale);
+ char buf[128];
+ int offset;
+ _output->print("\t[" PTR_FORMAT " - " PTR_FORMAT "] committed %d%s", base, end, size, unit);
+ if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) {
+ _output->print_cr(" from [%s+0x%x]", buf, offset);
+ }
+}
+
+void BaselineTTYOutputer::done_virtual_memory_map() {
+ _output->print_cr(" ");
+}
+
+
+
void BaselineTTYOutputer::start_callsite() {
_output->print_cr("Details:");
_output->print_cr(" ");
@@ -337,7 +391,7 @@ void BaselineTTYOutputer::malloc_callsite(address pc, size_t malloc_amt,
size_t malloc_count) {
if (malloc_amt > 0) {
const char* unit = memory_unit(_scale);
- char buf[64];
+ char buf[128];
int offset;
if (pc == 0) {
_output->print("[BOOTSTRAP]%18s", " ");
@@ -357,7 +411,7 @@ void BaselineTTYOutputer::virtual_memory_callsite(address pc, size_t reserved_am
size_t committed_amt) {
if (reserved_amt > 0) {
const char* unit = memory_unit(_scale);
- char buf[64];
+ char buf[128];
int offset;
if (pc == 0) {
_output->print("[BOOTSTRAP]%18s", " ");
@@ -502,7 +556,7 @@ void BaselineTTYOutputer::diff_malloc_callsite(address pc,
int malloc_diff, int malloc_count_diff) {
if (malloc_diff != 0) {
const char* unit = memory_unit(_scale);
- char buf[64];
+ char buf[128];
int offset;
if (pc == 0) {
_output->print_cr("[BOOTSTRAP]%18s", " ");
diff --git a/src/share/vm/services/memReporter.hpp b/src/share/vm/services/memReporter.hpp
index 4595b2797..7a5372d49 100644
--- a/src/share/vm/services/memReporter.hpp
+++ b/src/share/vm/services/memReporter.hpp
@@ -93,6 +93,11 @@ class BaselineOutputer : public StackObj {
virtual void done_category_summary() = 0;
+ virtual void start_virtual_memory_map() = 0;
+ virtual void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc) = 0;
+ virtual void committed_memory_region(address base, address end, size_t size, address pc) = 0;
+ virtual void done_virtual_memory_map() = 0;
+
/*
* Report callsite information
*/
@@ -136,6 +141,7 @@ class BaselineReporter : public StackObj {
private:
void report_summaries(const MemBaseline& baseline);
+ void report_virtual_memory_map(const MemBaseline& baseline);
void report_callsites(const MemBaseline& baseline);
void diff_summaries(const MemBaseline& cur, const MemBaseline& prev);
@@ -251,6 +257,13 @@ class BaselineTTYOutputer : public BaselineOutputer {
void done_category_summary();
+ // virtual memory map
+ void start_virtual_memory_map();
+ void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc);
+ void committed_memory_region(address base, address end, size_t size, address pc);
+ void done_virtual_memory_map();
+
+
/*
* Report callsite information
*/
diff --git a/src/share/vm/services/memSnapshot.cpp b/src/share/vm/services/memSnapshot.cpp
index f86057694..20e1ce61f 100644
--- a/src/share/vm/services/memSnapshot.cpp
+++ b/src/share/vm/services/memSnapshot.cpp
@@ -31,6 +31,220 @@
#include "services/memSnapshot.hpp"
#include "services/memTracker.hpp"
+
+bool VMMemPointerIterator::insert_record(MemPointerRecord* rec) {
+ VMMemRegionEx new_rec;
+ assert(rec->is_allocation_record() || rec->is_commit_record(),
+ "Sanity check");
+ if (MemTracker::track_callsite()) {
+ new_rec.init((MemPointerRecordEx*)rec);
+ } else {
+ new_rec.init(rec);
+ }
+ return insert(&new_rec);
+}
+
+bool VMMemPointerIterator::insert_record_after(MemPointerRecord* rec) {
+ VMMemRegionEx new_rec;
+ assert(rec->is_allocation_record() || rec->is_commit_record(),
+ "Sanity check");
+ if (MemTracker::track_callsite()) {
+ new_rec.init((MemPointerRecordEx*)rec);
+ } else {
+ new_rec.init(rec);
+ }
+ return insert_after(&new_rec);
+}
+
+// we don't consolidate reserved regions, since they may be categorized
+// in different types.
+bool VMMemPointerIterator::add_reserved_region(MemPointerRecord* rec) {
+ assert(rec->is_allocation_record(), "Sanity check");
+ VMMemRegion* cur = (VMMemRegion*)current();
+
+ // we don't have anything yet
+ if (cur == NULL) {
+ return insert_record(rec);
+ }
+
+ assert(cur->is_reserved_region(), "Sanity check");
+ // duplicated records
+ if (cur->is_same_region(rec)) {
+ return true;
+ }
+ assert(cur->base() > rec->addr(), "Just check: locate()");
+ assert(rec->addr() + rec->size() <= cur->base(), "Can not overlap");
+ return insert_record(rec);
+}
+
+// we do consolidate committed regions
+bool VMMemPointerIterator::add_committed_region(MemPointerRecord* rec) {
+ assert(rec->is_commit_record(), "Sanity check");
+ VMMemRegion* cur;
+ cur = (VMMemRegion*)current();
+ assert(cur->is_reserved_region() && cur->contains_region(rec),
+ "Sanity check");
+
+ // thread's native stack is always marked as "committed", ignore
+ // the "commit" operation for creating stack guard pages
+ if (FLAGS_TO_MEMORY_TYPE(cur->flags()) == mtThreadStack &&
+ FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) {
+ return true;
+ }
+
+ cur = (VMMemRegion*)next();
+ while (cur != NULL && cur->is_committed_region()) {
+ // duplicated commit records
+ if(cur->contains_region(rec)) {
+ return true;
+ }
+ if (cur->base() > rec->addr()) {
+ // committed regions can not overlap
+ assert(rec->addr() + rec->size() <= cur->base(), "Can not overlap");
+ if (rec->addr() + rec->size() == cur->base()) {
+ cur->expand_region(rec->addr(), rec->size());
+ return true;
+ } else {
+ return insert_record(rec);
+ }
+ } else if (cur->base() + cur->size() == rec->addr()) {
+ cur->expand_region(rec->addr(), rec->size());
+ VMMemRegion* next_reg = (VMMemRegion*)next();
+ // see if we can consolidate next committed region
+ if (next_reg != NULL && next_reg->is_committed_region() &&
+ next_reg->base() == cur->base() + cur->size()) {
+ cur->expand_region(next_reg->base(), next_reg->size());
+ remove();
+ }
+ return true;
+ }
+ cur = (VMMemRegion*)next();
+ }
+ return insert_record(rec);
+}
+
+bool VMMemPointerIterator::remove_uncommitted_region(MemPointerRecord* rec) {
+ assert(rec->is_uncommit_record(), "sanity check");
+ VMMemRegion* cur;
+ cur = (VMMemRegion*)current();
+ assert(cur->is_reserved_region() && cur->contains_region(rec),
+ "Sanity check");
+ // thread's native stack is always marked as "committed", ignore
+ // the "commit" operation for creating stack guard pages
+ if (FLAGS_TO_MEMORY_TYPE(cur->flags()) == mtThreadStack &&
+ FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) {
+ return true;
+ }
+
+ cur = (VMMemRegion*)next();
+ while (cur != NULL && cur->is_committed_region()) {
+ // region already uncommitted, must be due to duplicated record
+ if (cur->addr() >= rec->addr() + rec->size()) {
+ break;
+ } else if (cur->contains_region(rec)) {
+ // uncommit whole region
+ if (cur->is_same_region(rec)) {
+ remove();
+ break;
+ } else if (rec->addr() == cur->addr() ||
+ rec->addr() + rec->size() == cur->addr() + cur->size()) {
+ // uncommitted from either end of current memory region.
+ cur->exclude_region(rec->addr(), rec->size());
+ break;
+ } else { // split the committed region and release the middle
+ address high_addr = cur->addr() + cur->size();
+ size_t sz = high_addr - rec->addr();
+ cur->exclude_region(rec->addr(), sz);
+ sz = high_addr - (rec->addr() + rec->size());
+ if (MemTracker::track_callsite()) {
+ MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz,
+ ((VMMemRegionEx*)cur)->pc());
+ return insert_record_after(&tmp);
+ } else {
+ MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz);
+ return insert_record_after(&tmp);
+ }
+ }
+ }
+ cur = (VMMemRegion*)next();
+ }
+
+ // we may not find committed record due to duplicated records
+ return true;
+}
+
+bool VMMemPointerIterator::remove_released_region(MemPointerRecord* rec) {
+ assert(rec->is_deallocation_record(), "Sanity check");
+ VMMemRegion* cur = (VMMemRegion*)current();
+ assert(cur->is_reserved_region() && cur->contains_region(rec),
+ "Sanity check");
+#ifdef ASSERT
+ VMMemRegion* next_reg = (VMMemRegion*)peek_next();
+ // should not have any committed memory in this reserved region
+ assert(next_reg == NULL || !next_reg->is_committed_region(), "Sanity check");
+#endif
+ if (rec->is_same_region(cur)) {
+ remove();
+ } else if (rec->addr() == cur->addr() ||
+ rec->addr() + rec->size() == cur->addr() + cur->size()) {
+ // released region is at either end of this region
+ cur->exclude_region(rec->addr(), rec->size());
+ } else { // split the reserved region and release the middle
+ address high_addr = cur->addr() + cur->size();
+ size_t sz = high_addr - rec->addr();
+ cur->exclude_region(rec->addr(), sz);
+ sz = high_addr - rec->addr() - rec->size();
+ if (MemTracker::track_callsite()) {
+ MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz,
+ ((VMMemRegionEx*)cur)->pc());
+ return insert_reserved_region(&tmp);
+ } else {
+ MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz);
+ return insert_reserved_region(&tmp);
+ }
+ }
+ return true;
+}
+
+bool VMMemPointerIterator::insert_reserved_region(MemPointerRecord* rec) {
+ // skip all 'commit' records associated with previous reserved region
+ VMMemRegion* p = (VMMemRegion*)next();
+ while (p != NULL && p->is_committed_region() &&
+ p->base() + p->size() < rec->addr()) {
+ p = (VMMemRegion*)next();
+ }
+ return insert_record(rec);
+}
+
+bool VMMemPointerIterator::split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size) {
+ assert(rgn->contains_region(new_rgn_addr, new_rgn_size), "Not fully contained");
+ address pc = (MemTracker::track_callsite() ? ((VMMemRegionEx*)rgn)->pc() : NULL);
+ if (rgn->base() == new_rgn_addr) { // new region is at the beginning of the region
+ size_t sz = rgn->size() - new_rgn_size;
+ // the original region becomes 'new' region
+ rgn->exclude_region(new_rgn_addr + new_rgn_size, sz);
+ // remaining becomes next region
+ MemPointerRecordEx next_rgn(new_rgn_addr + new_rgn_size, rgn->flags(), sz, pc);
+ return insert_reserved_region(&next_rgn);
+ } else if (rgn->base() + rgn->size() == new_rgn_addr + new_rgn_size) {
+ rgn->exclude_region(new_rgn_addr, new_rgn_size);
+ MemPointerRecordEx next_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc);
+ return insert_reserved_region(&next_rgn);
+ } else {
+ // the orginal region will be split into three
+ address rgn_high_addr = rgn->base() + rgn->size();
+ // first region
+ rgn->exclude_region(new_rgn_addr, (rgn_high_addr - new_rgn_addr));
+ // the second region is the new region
+ MemPointerRecordEx new_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc);
+ if (!insert_reserved_region(&new_rgn)) return false;
+ // the remaining region
+ MemPointerRecordEx rem_rgn(new_rgn_addr + new_rgn_size, rgn->flags(),
+ rgn_high_addr - (new_rgn_addr + new_rgn_size), pc);
+ return insert_reserved_region(&rem_rgn);
+ }
+}
+
static int sort_in_seq_order(const void* p1, const void* p2) {
assert(p1 != NULL && p2 != NULL, "Sanity check");
const MemPointerRecord* mp1 = (MemPointerRecord*)p1;
@@ -61,11 +275,11 @@ bool StagingArea::init() {
}
-MemPointerArrayIteratorImpl StagingArea::virtual_memory_record_walker() {
+VMRecordIterator StagingArea::virtual_memory_record_walker() {
MemPointerArray* arr = vm_data();
// sort into seq number order
arr->sort((FN_SORT)sort_in_seq_order);
- return MemPointerArrayIteratorImpl(arr);
+ return VMRecordIterator(arr);
}
@@ -135,6 +349,8 @@ bool MemSnapshot::merge(MemRecorder* rec) {
return false;
}
} else {
+ // locate matched record and/or also position the iterator to proper
+ // location for this incoming record.
p2 = (MemPointerRecord*)malloc_staging_itr.locate(p1->addr());
// we have not seen this memory block, so just add to staging area
if (p2 == NULL) {
@@ -199,7 +415,7 @@ bool MemSnapshot::promote() {
MallocRecordIterator malloc_itr = _staging_area.malloc_record_walker();
bool promoted = false;
if (promote_malloc_records(&malloc_itr)) {
- MemPointerArrayIteratorImpl vm_itr = _staging_area.virtual_memory_record_walker();
+ VMRecordIterator vm_itr = _staging_area.virtual_memory_record_walker();
if (promote_virtual_memory_records(&vm_itr)) {
promoted = true;
}
@@ -218,7 +434,7 @@ bool MemSnapshot::promote_malloc_records(MemPointerArrayIterator* itr) {
matched_rec = (MemPointerRecord*)malloc_snapshot_itr.locate(new_rec->addr());
// found matched memory block
if (matched_rec != NULL && new_rec->addr() == matched_rec->addr()) {
- // snapshot already contains 'lived' records
+ // snapshot already contains 'live' records
assert(matched_rec->is_allocation_record() || matched_rec->is_arena_size_record(),
"Sanity check");
// update block states
@@ -277,87 +493,60 @@ bool MemSnapshot::promote_malloc_records(MemPointerArrayIterator* itr) {
bool MemSnapshot::promote_virtual_memory_records(MemPointerArrayIterator* itr) {
VMMemPointerIterator vm_snapshot_itr(_vm_ptrs);
MemPointerRecord* new_rec = (MemPointerRecord*)itr->current();
- VMMemRegionEx new_vm_rec;
- VMMemRegion* matched_rec;
+ VMMemRegion* reserved_rec;
while (new_rec != NULL) {
assert(new_rec->is_vm_pointer(), "Sanity check");
- if (MemTracker::track_callsite()) {
- new_vm_rec.init((MemPointerRecordEx*)new_rec);
- } else {
- new_vm_rec.init(new_rec);
- }
- matched_rec = (VMMemRegion*)vm_snapshot_itr.locate(new_rec->addr());
- if (matched_rec != NULL &&
- (matched_rec->contains(&new_vm_rec) || matched_rec->base() == new_vm_rec.base())) {
+
+ // locate a reserved region that contains the specified address, or
+ // the nearest reserved region has base address just above the specified
+ // address
+ reserved_rec = (VMMemRegion*)vm_snapshot_itr.locate(new_rec->addr());
+ if (reserved_rec != NULL && reserved_rec->contains_region(new_rec)) {
// snapshot can only have 'live' records
- assert(matched_rec->is_reserve_record(), "Sanity check");
- if (new_vm_rec.is_reserve_record() && matched_rec->base() == new_vm_rec.base()) {
- // resize reserved virtual memory range
- // resize has to cover committed area
- assert(new_vm_rec.size() >= matched_rec->committed_size(), "Sanity check");
- matched_rec->set_reserved_size(new_vm_rec.size());
- } else if (new_vm_rec.is_commit_record()) {
- // commit memory inside reserved memory range
- assert(new_vm_rec.committed_size() <= matched_rec->reserved_size(), "Sanity check");
- // thread stacks are marked committed, so we ignore 'commit' record for creating
- // stack guard pages
- if (FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) != mtThreadStack) {
- matched_rec->commit(new_vm_rec.committed_size());
- }
- } else if (new_vm_rec.is_uncommit_record()) {
- if (FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == mtThreadStack) {
- // ignore 'uncommit' record from removing stack guard pages, uncommit
- // thread stack as whole
- if (matched_rec->committed_size() == new_vm_rec.committed_size()) {
- matched_rec->uncommit(new_vm_rec.committed_size());
+ assert(reserved_rec->is_reserved_region(), "Sanity check");
+ if (new_rec->is_allocation_record()) {
+ if (!reserved_rec->is_same_region(new_rec)) {
+ // only deal with split a bigger reserved region into smaller regions.
+ // So far, CDS is the only use case.
+ if (!vm_snapshot_itr.split_reserved_region(reserved_rec, new_rec->addr(), new_rec->size())) {
+ return false;
}
- } else {
- // uncommit memory inside reserved memory range
- assert(new_vm_rec.committed_size() <= matched_rec->committed_size(),
- "Sanity check");
- matched_rec->uncommit(new_vm_rec.committed_size());
}
- } else if (new_vm_rec.is_type_tagging_record()) {
- // tag this virtual memory range to a memory type
- // can not re-tag a memory range to different type
- assert(FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == mtNone ||
- FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == FLAGS_TO_MEMORY_TYPE(new_vm_rec.flags()),
- "Sanity check");
- matched_rec->tag(new_vm_rec.flags());
- } else if (new_vm_rec.is_release_record()) {
- // release part or whole memory range
- if (new_vm_rec.base() == matched_rec->base() &&
- new_vm_rec.size() == matched_rec->size()) {
- // release whole virtual memory range
- assert(matched_rec->committed_size() == 0, "Sanity check");
- vm_snapshot_itr.remove();
- } else {
- // partial release
- matched_rec->partial_release(new_vm_rec.base(), new_vm_rec.size());
+ } else if (new_rec->is_uncommit_record()) {
+ if (!vm_snapshot_itr.remove_uncommitted_region(new_rec)) {
+ return false;
}
- } else {
- // multiple reserve/commit on the same virtual memory range
- assert((new_vm_rec.is_reserve_record() || new_vm_rec.is_commit_record()) &&
- (new_vm_rec.base() == matched_rec->base() && new_vm_rec.size() == matched_rec->size()),
- "Sanity check");
- matched_rec->tag(new_vm_rec.flags());
- }
+ } else if (new_rec->is_commit_record()) {
+ // insert or expand existing committed region to cover this
+ // newly committed region
+ if (!vm_snapshot_itr.add_committed_region(new_rec)) {
+ return false;
+ }
+ } else if (new_rec->is_deallocation_record()) {
+ // release part or all memory region
+ if (!vm_snapshot_itr.remove_released_region(new_rec)) {
+ return false;
+ }
+ } else if (new_rec->is_type_tagging_record()) {
+ // tag this reserved virtual memory range to a memory type. Can not re-tag a memory range
+ // to different type.
+ assert(FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == mtNone ||
+ FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == FLAGS_TO_MEMORY_TYPE(new_rec->flags()),
+ "Sanity check");
+ reserved_rec->tag(new_rec->flags());
} else {
- // no matched record
- if (new_vm_rec.is_reserve_record()) {
- if (matched_rec == NULL || matched_rec->base() > new_vm_rec.base()) {
- if (!vm_snapshot_itr.insert(&new_vm_rec)) {
- return false;
+ ShouldNotReachHere();
}
} else {
- if (!vm_snapshot_itr.insert_after(&new_vm_rec)) {
+ /*
+ * The assertion failure indicates mis-matched virtual memory records. The likely
+ * scenario is, that some virtual memory operations are not going through os::xxxx_memory()
+ * api, which have to be tracked manually. (perfMemory is an example).
+ */
+ assert(new_rec->is_allocation_record(), "Sanity check");
+ if (!vm_snapshot_itr.add_reserved_region(new_rec)) {
return false;
}
- }
- } else {
- // throw out obsolete records, which are the commit/uncommit/release/tag records
- // on memory regions that are already released.
- }
}
new_rec = (MemPointerRecord*)itr->next();
}
@@ -433,5 +622,33 @@ void MemSnapshot::check_staging_data() {
cur = (MemPointerRecord*)vm_itr.next();
}
}
+
+void MemSnapshot::dump_all_vm_pointers() {
+ MemPointerArrayIteratorImpl itr(_vm_ptrs);
+ VMMemRegion* ptr = (VMMemRegion*)itr.current();
+ tty->print_cr("dump virtual memory pointers:");
+ while (ptr != NULL) {
+ if (ptr->is_committed_region()) {
+ tty->print("\t");
+ }
+ tty->print("[" PTR_FORMAT " - " PTR_FORMAT "] [%x]", ptr->addr(),
+ (ptr->addr() + ptr->size()), ptr->flags());
+
+ if (MemTracker::track_callsite()) {
+ VMMemRegionEx* ex = (VMMemRegionEx*)ptr;
+ if (ex->pc() != NULL) {
+ char buf[1024];
+ if (os::dll_address_to_function_name(ex->pc(), buf, sizeof(buf), NULL)) {
+ tty->print_cr("\t%s", buf);
+ } else {
+ tty->print_cr("");
+ }
+ }
+ }
+
+ ptr = (VMMemRegion*)itr.next();
+ }
+ tty->flush();
+}
#endif // ASSERT
diff --git a/src/share/vm/services/memSnapshot.hpp b/src/share/vm/services/memSnapshot.hpp
index 9ac6e4cf3..dd52f4cd7 100644
--- a/src/share/vm/services/memSnapshot.hpp
+++ b/src/share/vm/services/memSnapshot.hpp
@@ -111,33 +111,41 @@ class VMMemPointerIterator : public MemPointerIterator {
MemPointerIterator(arr) {
}
- // locate an existing record that contains specified address, or
- // the record, where the record with specified address, should
- // be inserted.
- // virtual memory record array is sorted in address order, so
- // binary search is performed
+ // locate an existing reserved memory region that contains specified address,
+ // or the reserved region just above this address, where the incoming
+ // reserved region should be inserted.
virtual MemPointer* locate(address addr) {
- int index_low = 0;
- int index_high = _array->length();
- int index_mid = (index_high + index_low) / 2;
- int r = 1;
- while (index_low < index_high && (r = compare(index_mid, addr)) != 0) {
- if (r > 0) {
- index_high = index_mid;
- } else {
- index_low = index_mid;
+ reset();
+ VMMemRegion* reg = (VMMemRegion*)current();
+ while (reg != NULL) {
+ if (reg->is_reserved_region()) {
+ if (reg->contains_address(addr) || addr < reg->base()) {
+ return reg;
}
- index_mid = (index_high + index_low) / 2;
}
- if (r == 0) {
- // update current location
- _pos = index_mid;
- return _array->at(index_mid);
- } else {
+ reg = (VMMemRegion*)next();
+ }
return NULL;
}
- }
+ // following methods update virtual memory in the context
+ // of 'current' position, which is properly positioned by
+ // callers via locate method.
+ bool add_reserved_region(MemPointerRecord* rec);
+ bool add_committed_region(MemPointerRecord* rec);
+ bool remove_uncommitted_region(MemPointerRecord* rec);
+ bool remove_released_region(MemPointerRecord* rec);
+
+ // split a reserved region to create a new memory region with specified base and size
+ bool split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size);
+ private:
+ bool insert_record(MemPointerRecord* rec);
+ bool insert_record_after(MemPointerRecord* rec);
+
+ bool insert_reserved_region(MemPointerRecord* rec);
+
+ // reset current position
+ inline void reset() { _pos = 0; }
#ifdef ASSERT
virtual bool is_dup_pointer(const MemPointer* ptr1,
const MemPointer* ptr2) const {
@@ -154,32 +162,17 @@ class VMMemPointerIterator : public MemPointerIterator {
(p1->flags() & MemPointerRecord::tag_masks) == MemPointerRecord::tag_release;
}
#endif
- // compare if an address falls into a memory region,
- // return 0, if the address falls into a memory region at specified index
- // return 1, if memory region pointed by specified index is higher than the address
- // return -1, if memory region pointed by specified index is lower than the address
- int compare(int index, address addr) const {
- VMMemRegion* r = (VMMemRegion*)_array->at(index);
- assert(r->is_reserve_record(), "Sanity check");
- if (r->addr() > addr) {
- return 1;
- } else if (r->addr() + r->reserved_size() <= addr) {
- return -1;
- } else {
- return 0;
- }
- }
};
class MallocRecordIterator : public MemPointerArrayIterator {
- private:
+ protected:
MemPointerArrayIteratorImpl _itr;
public:
MallocRecordIterator(MemPointerArray* arr) : _itr(arr) {
}
- MemPointer* current() const {
+ virtual MemPointer* current() const {
MemPointerRecord* cur = (MemPointerRecord*)_itr.current();
assert(cur == NULL || !cur->is_vm_pointer(), "seek error");
MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next();
@@ -194,7 +187,7 @@ class MallocRecordIterator : public MemPointerArrayIterator {
}
}
- MemPointer* next() {
+ virtual MemPointer* next() {
MemPointerRecord* cur = (MemPointerRecord*)_itr.current();
assert(cur == NULL || !cur->is_vm_pointer(), "Sanity check");
MemPointerRecord* next = (MemPointerRecord*)_itr.next();
@@ -214,6 +207,63 @@ class MallocRecordIterator : public MemPointerArrayIterator {
bool insert_after(MemPointer* ptr) { ShouldNotReachHere(); return false; }
};
+// collapse duplicated records. Eliminating duplicated records here, is much
+// cheaper than during promotion phase. However, it does have limitation - it
+// can only eliminate duplicated records within the generation, there are
+// still chances seeing duplicated records during promotion.
+// We want to use the record with higher sequence number, because it has
+// more accurate callsite pc.
+class VMRecordIterator : public MallocRecordIterator {
+ public:
+ VMRecordIterator(MemPointerArray* arr) : MallocRecordIterator(arr) {
+ MemPointerRecord* cur = (MemPointerRecord*)_itr.current();
+ MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next();
+ while (next != NULL) {
+ assert(cur != NULL, "Sanity check");
+ assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(),
+ "pre-sort order");
+
+ if (is_duplicated_record(cur, next)) {
+ _itr.next();
+ next = (MemPointerRecord*)_itr.peek_next();
+ } else {
+ break;
+ }
+ }
+ }
+
+ virtual MemPointer* current() const {
+ return _itr.current();
+ }
+
+ // get next record, but skip the duplicated records
+ virtual MemPointer* next() {
+ MemPointerRecord* cur = (MemPointerRecord*)_itr.next();
+ MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next();
+ while (next != NULL) {
+ assert(cur != NULL, "Sanity check");
+ assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(),
+ "pre-sort order");
+
+ if (is_duplicated_record(cur, next)) {
+ _itr.next();
+ cur = next;
+ next = (MemPointerRecord*)_itr.peek_next();
+ } else {
+ break;
+ }
+ }
+ return cur;
+ }
+
+ private:
+ bool is_duplicated_record(MemPointerRecord* p1, MemPointerRecord* p2) const {
+ bool ret = (p1->addr() == p2->addr() && p1->size() == p2->size() && p1->flags() == p2->flags());
+ assert(!(ret && FLAGS_TO_MEMORY_TYPE(p1->flags()) == mtThreadStack), "dup on stack record");
+ return ret;
+ }
+};
+
class StagingArea : public _ValueObj {
private:
MemPointerArray* _malloc_data;
@@ -233,7 +283,8 @@ class StagingArea : public _ValueObj {
return MallocRecordIterator(malloc_data());
}
- MemPointerArrayIteratorImpl virtual_memory_record_walker();
+ VMRecordIterator virtual_memory_record_walker();
+
bool init();
void clear() {
assert(_malloc_data != NULL && _vm_data != NULL, "Just check");
@@ -293,6 +344,8 @@ class MemSnapshot : public CHeapObj<mtNMT> {
NOT_PRODUCT(void check_staging_data();)
NOT_PRODUCT(void check_malloc_pointers();)
NOT_PRODUCT(bool has_allocation_record(address addr);)
+ // dump all virtual memory pointers in snapshot
+ DEBUG_ONLY( void dump_all_vm_pointers();)
private:
// copy pointer data from src to dest
@@ -302,5 +355,4 @@ class MemSnapshot : public CHeapObj<mtNMT> {
bool promote_virtual_memory_records(MemPointerArrayIterator* itr);
};
-
#endif // SHARE_VM_SERVICES_MEM_SNAPSHOT_HPP
diff --git a/src/share/vm/services/memTracker.cpp b/src/share/vm/services/memTracker.cpp
index 2af6e10d6..3cec67868 100644
--- a/src/share/vm/services/memTracker.cpp
+++ b/src/share/vm/services/memTracker.cpp
@@ -364,7 +364,7 @@ void MemTracker::create_memory_record(address addr, MEMFLAGS flags,
if (thread != NULL) {
if (thread->is_Java_thread() && ((JavaThread*)thread)->is_safepoint_visible()) {
- JavaThread* java_thread = static_cast<JavaThread*>(thread);
+ JavaThread* java_thread = (JavaThread*)thread;
JavaThreadState state = java_thread->thread_state();
if (SafepointSynchronize::safepoint_safe(java_thread, state)) {
// JavaThreads that are safepoint safe, can run through safepoint,
@@ -472,6 +472,8 @@ void MemTracker::sync() {
// it should guarantee that NMT is fully sync-ed.
ThreadCritical tc;
+ SequenceGenerator::reset();
+
// walk all JavaThreads to collect recorders
SyncThreadRecorderClosure stc;
Threads::threads_do(&stc);
@@ -484,11 +486,12 @@ void MemTracker::sync() {
pending_recorders = _global_recorder;
_global_recorder = NULL;
}
- SequenceGenerator::reset();
// check _worker_thread with lock to avoid racing condition
if (_worker_thread != NULL) {
_worker_thread->at_sync_point(pending_recorders);
}
+
+ assert(SequenceGenerator::peek() == 1, "Should not have memory activities during sync-point");
}
}
diff --git a/src/share/vm/services/memTracker.hpp b/src/share/vm/services/memTracker.hpp
index 62637d8a8..c42f01bf6 100644
--- a/src/share/vm/services/memTracker.hpp
+++ b/src/share/vm/services/memTracker.hpp
@@ -113,8 +113,10 @@ class MemTracker : AllStatic {
#include "thread_solaris.inline.hpp"
#endif
-#ifdef _DEBUG
- #define DEBUG_CALLER_PC os::get_caller_pc(3)
+extern bool NMT_track_callsite;
+
+#ifdef ASSERT
+ #define DEBUG_CALLER_PC (NMT_track_callsite ? os::get_caller_pc(2) : 0)
#else
#define DEBUG_CALLER_PC 0
#endif
@@ -261,7 +263,7 @@ class MemTracker : AllStatic {
// record a 'malloc' call
static inline void record_malloc(address addr, size_t size, MEMFLAGS flags,
address pc = 0, Thread* thread = NULL) {
- if (NMT_CAN_TRACK(flags)) {
+ if (is_on() && NMT_CAN_TRACK(flags)) {
assert(size > 0, "Sanity check");
create_memory_record(addr, (flags|MemPointerRecord::malloc_tag()), size, pc, thread);
}
@@ -275,7 +277,7 @@ class MemTracker : AllStatic {
// record a 'realloc' call
static inline void record_realloc(address old_addr, address new_addr, size_t size,
MEMFLAGS flags, address pc = 0, Thread* thread = NULL) {
- if (is_on()) {
+ if (is_on() && NMT_CAN_TRACK(flags)) {
assert(size > 0, "Sanity check");
record_free(old_addr, flags, thread);
record_malloc(new_addr, size, flags, pc, thread);
@@ -317,6 +319,7 @@ class MemTracker : AllStatic {
static inline void release_thread_stack(address addr, size_t size, Thread* thr) {
if (is_on()) {
assert(size > 0 && thr != NULL, "Sanity check");
+ assert(!thr->is_Java_thread(), "too early");
create_memory_record(addr, MemPointerRecord::virtual_memory_uncommit_tag() | mtThreadStack,
size, DEBUG_CALLER_PC, thr);
create_memory_record(addr, MemPointerRecord::virtual_memory_release_tag() | mtThreadStack,
@@ -326,11 +329,11 @@ class MemTracker : AllStatic {
// record a virtual memory 'commit' call
static inline void record_virtual_memory_commit(address addr, size_t size,
- address pc = 0, Thread* thread = NULL) {
+ address pc, Thread* thread = NULL) {
if (is_on()) {
assert(size > 0, "Sanity check");
create_memory_record(addr, MemPointerRecord::virtual_memory_commit_tag(),
- size, DEBUG_CALLER_PC, thread);
+ size, pc, thread);
}
}
diff --git a/test/compiler/6340864/TestByteVect.java b/test/compiler/6340864/TestByteVect.java
index ec4ba9fcd..5db3687e6 100644
--- a/test/compiler/6340864/TestByteVect.java
+++ b/test/compiler/6340864/TestByteVect.java
@@ -33,7 +33,7 @@
public class TestByteVect {
private static final int ARRLEN = 997;
private static final int ITERS = 11000;
- private static final int ADD_INIT = 0;
+ private static final int ADD_INIT = 63;
private static final int BIT_MASK = 0xB7;
private static final int VALUE = 3;
private static final int SHIFT = 8;
@@ -76,6 +76,7 @@ public class TestByteVect {
test_subc(a0, a1);
test_subv(a0, a1, (byte)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (byte)VALUE);
test_mula(a0, a1, a2);
@@ -88,6 +89,7 @@ public class TestByteVect {
test_divc_n(a0, a1);
test_divv(a0, a1, (byte)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (byte)BIT_MASK);
test_anda(a0, a1, a4);
@@ -97,30 +99,49 @@ public class TestByteVect {
test_xorc(a0, a1);
test_xorv(a0, a1, (byte)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
test_pack2(p2, a1);
test_unpack2(a0, p2);
test_pack2_swap(p2, a1);
@@ -369,6 +390,60 @@ public class TestByteVect {
errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
@@ -805,6 +880,84 @@ public class TestByteVect {
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1);
}
end = System.currentTimeMillis();
@@ -1036,6 +1189,26 @@ public class TestByteVect {
a0[i] = (byte)(a1[i]<<b);
}
}
+ static void test_sllc_add(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -1062,6 +1235,26 @@ public class TestByteVect {
a0[i] = (byte)(a1[i]>>>b);
}
}
+ static void test_srlc_add(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -1088,6 +1281,26 @@ public class TestByteVect {
a0[i] = (byte)(a1[i]>>b);
}
}
+ static void test_srac_add(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & b)>>VALUE);
+ }
+ }
static void test_pack2(short[] p2, byte[] a1) {
if (p2.length*2 > a1.length) return;
diff --git a/test/compiler/6340864/TestIntVect.java b/test/compiler/6340864/TestIntVect.java
index 36e277f73..5866b34a5 100644
--- a/test/compiler/6340864/TestIntVect.java
+++ b/test/compiler/6340864/TestIntVect.java
@@ -74,6 +74,7 @@ public class TestIntVect {
test_subc(a0, a1);
test_subv(a0, a1, (int)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (int)VALUE);
test_mula(a0, a1, a2);
@@ -86,6 +87,7 @@ public class TestIntVect {
test_divc_n(a0, a1);
test_divv(a0, a1, (int)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (int)BIT_MASK);
test_anda(a0, a1, a4);
@@ -95,30 +97,49 @@ public class TestIntVect {
test_xorc(a0, a1);
test_xorv(a0, a1, (int)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
test_pack2(p2, a1);
test_unpack2(a0, p2);
test_pack2_swap(p2, a1);
@@ -359,6 +380,60 @@ public class TestIntVect {
errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32));
@@ -727,6 +802,84 @@ public class TestIntVect {
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1);
}
end = System.currentTimeMillis();
@@ -908,6 +1061,26 @@ public class TestIntVect {
a0[i] = (int)(a1[i]<<b);
}
}
+ static void test_sllc_add(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -934,6 +1107,26 @@ public class TestIntVect {
a0[i] = (int)(a1[i]>>>b);
}
}
+ static void test_srlc_add(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -960,6 +1153,26 @@ public class TestIntVect {
a0[i] = (int)(a1[i]>>b);
}
}
+ static void test_srac_add(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & b)>>VALUE);
+ }
+ }
static void test_pack2(long[] p2, int[] a1) {
if (p2.length*2 > a1.length) return;
diff --git a/test/compiler/6340864/TestLongVect.java b/test/compiler/6340864/TestLongVect.java
index 70b41f4b4..436a8472d 100644
--- a/test/compiler/6340864/TestLongVect.java
+++ b/test/compiler/6340864/TestLongVect.java
@@ -73,6 +73,7 @@ public class TestLongVect {
test_subc(a0, a1);
test_subv(a0, a1, (long)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (long)VALUE);
test_mula(a0, a1, a2);
@@ -85,6 +86,7 @@ public class TestLongVect {
test_divc_n(a0, a1);
test_divv(a0, a1, (long)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (long)BIT_MASK);
test_anda(a0, a1, a4);
@@ -94,30 +96,48 @@ public class TestLongVect {
test_xorc(a0, a1);
test_xorv(a0, a1, (long)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
}
// Test and verify results
System.out.println("Verification");
@@ -354,6 +374,60 @@ public class TestLongVect {
errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
}
if (errn > 0)
@@ -696,6 +770,84 @@ public class TestLongVect {
end = System.currentTimeMillis();
System.out.println("test_srav_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
return errn;
}
@@ -854,6 +1006,26 @@ public class TestLongVect {
a0[i] = (long)(a1[i]<<b);
}
}
+ static void test_sllc_add(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -880,6 +1052,26 @@ public class TestLongVect {
a0[i] = (long)(a1[i]>>>b);
}
}
+ static void test_srlc_add(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -906,6 +1098,26 @@ public class TestLongVect {
a0[i] = (long)(a1[i]>>b);
}
}
+ static void test_srac_add(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & b)>>VALUE);
+ }
+ }
static int verify(String text, int i, long elem, long val) {
if (elem != val) {
diff --git a/test/compiler/6340864/TestShortVect.java b/test/compiler/6340864/TestShortVect.java
index a688e0de0..9f59c8f22 100644
--- a/test/compiler/6340864/TestShortVect.java
+++ b/test/compiler/6340864/TestShortVect.java
@@ -75,6 +75,7 @@ public class TestShortVect {
test_subc(a0, a1);
test_subv(a0, a1, (short)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (short)VALUE);
test_mula(a0, a1, a2);
@@ -87,6 +88,7 @@ public class TestShortVect {
test_divc_n(a0, a1);
test_divv(a0, a1, (short)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (short)BIT_MASK);
test_anda(a0, a1, a4);
@@ -96,30 +98,49 @@ public class TestShortVect {
test_xorc(a0, a1);
test_xorv(a0, a1, (short)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
test_pack2(p2, a1);
test_unpack2(a0, p2);
test_pack2_swap(p2, a1);
@@ -364,6 +385,60 @@ public class TestShortVect {
errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
@@ -762,6 +837,84 @@ public class TestShortVect {
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1);
}
end = System.currentTimeMillis();
@@ -968,6 +1121,26 @@ public class TestShortVect {
a0[i] = (short)(a1[i]<<b);
}
}
+ static void test_sllc_add(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -994,6 +1167,26 @@ public class TestShortVect {
a0[i] = (short)(a1[i]>>>b);
}
}
+ static void test_srlc_add(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -1020,6 +1213,26 @@ public class TestShortVect {
a0[i] = (short)(a1[i]>>b);
}
}
+ static void test_srac_add(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & b)>>VALUE);
+ }
+ }
static void test_pack2(int[] p2, short[] a1) {
if (p2.length*2 > a1.length) return;
diff --git a/test/compiler/7184394/TestAESBase.java b/test/compiler/7184394/TestAESBase.java
new file mode 100644
index 000000000..ad6c835cc
--- /dev/null
+++ b/test/compiler/7184394/TestAESBase.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+import javax.crypto.KeyGenerator;
+import javax.crypto.SecretKey;
+import javax.crypto.spec.IvParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
+import java.security.AlgorithmParameters;
+
+import java.util.Random;
+import java.util.Arrays;
+
+abstract public class TestAESBase {
+ int msgSize = Integer.getInteger("msgSize", 646);
+ boolean checkOutput = Boolean.getBoolean("checkOutput");
+ boolean noReinit = Boolean.getBoolean("noReinit");
+ int keySize = Integer.getInteger("keySize", 128);
+ String algorithm = System.getProperty("algorithm", "AES");
+ String mode = System.getProperty("mode", "CBC");
+ byte[] input;
+ byte[] encode;
+ byte[] expectedEncode;
+ byte[] decode;
+ byte[] expectedDecode;
+ Random random = new Random(0);
+ Cipher cipher;
+ Cipher dCipher;
+ String paddingStr = "PKCS5Padding";
+ AlgorithmParameters algParams;
+ SecretKey key;
+ int ivLen;
+
+ static int numThreads = 0;
+ int threadId;
+ static synchronized int getThreadId() {
+ int id = numThreads;
+ numThreads++;
+ return id;
+ }
+
+ abstract public void run();
+
+ public void prepare() {
+ try {
+ System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput);
+
+ int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
+ byte keyBytes[] = new byte[keyLenBytes];
+ if (keySize == 128)
+ keyBytes = new byte[] {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7};
+ else
+ random.nextBytes(keyBytes);
+
+ key = new SecretKeySpec(keyBytes, algorithm);
+ if (threadId == 0) {
+ System.out.println("Algorithm: " + key.getAlgorithm() + "("
+ + key.getEncoded().length * 8 + "bit)");
+ }
+ input = new byte[msgSize];
+ for (int i=0; i<input.length; i++) {
+ input[i] = (byte) (i & 0xff);
+ }
+
+ cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
+ dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
+
+ ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
+ IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
+
+ cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+ algParams = cipher.getParameters();
+ dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+ if (threadId == 0) {
+ childShowCipher();
+ }
+
+ // do one encode and decode in preparation
+ // this will also create the encode buffer and decode buffer
+ encode = cipher.doFinal(input);
+ decode = dCipher.doFinal(encode);
+ if (checkOutput) {
+ expectedEncode = (byte[]) encode.clone();
+ expectedDecode = (byte[]) decode.clone();
+ showArray(key.getEncoded() , "key: ");
+ showArray(input, "input: ");
+ showArray(encode, "encode: ");
+ showArray(decode, "decode: ");
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ void showArray(byte b[], String name) {
+ System.out.format("%s [%d]: ", name, b.length);
+ for (int i=0; i<Math.min(b.length, 32); i++) {
+ System.out.format("%02x ", b[i] & 0xff);
+ }
+ System.out.println();
+ }
+
+ void compareArrays(byte b[], byte exp[]) {
+ if (b.length != exp.length) {
+ System.out.format("different lengths for actual and expected output arrays\n");
+ showArray(b, "test: ");
+ showArray(exp, "exp : ");
+ System.exit(1);
+ }
+ for (int i=0; i< exp.length; i++) {
+ if (b[i] != exp[i]) {
+ System.out.format("output error at index %d: got %02x, expected %02x\n", i, b[i] & 0xff, exp[i] & 0xff);
+ showArray(b, "test: ");
+ showArray(exp, "exp : ");
+ System.exit(1);
+ }
+ }
+ }
+
+
+ void showCipher(Cipher c, String kind) {
+ System.out.println(kind + " cipher provider: " + cipher.getProvider());
+ System.out.println(kind + " cipher algorithm: " + cipher.getAlgorithm());
+ }
+
+ abstract void childShowCipher();
+}
diff --git a/test/compiler/7184394/TestAESDecode.java b/test/compiler/7184394/TestAESDecode.java
new file mode 100644
index 000000000..f9ec02d15
--- /dev/null
+++ b/test/compiler/7184394/TestAESDecode.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+
+public class TestAESDecode extends TestAESBase {
+ @Override
+ public void run() {
+ try {
+ if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+ if (checkOutput) {
+ // checked version creates new output buffer each time
+ decode = dCipher.doFinal(encode, 0, encode.length);
+ compareArrays(decode, expectedDecode);
+ } else {
+ // non-checked version outputs to existing encode buffer for maximum speed
+ decode = new byte[dCipher.getOutputSize(encode.length)];
+ dCipher.doFinal(encode, 0, encode.length, decode);
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ @Override
+ void childShowCipher() {
+ showCipher(dCipher, "Decryption");
+ }
+
+}
diff --git a/test/compiler/7184394/TestAESEncode.java b/test/compiler/7184394/TestAESEncode.java
new file mode 100644
index 000000000..1d6bf7fbd
--- /dev/null
+++ b/test/compiler/7184394/TestAESEncode.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+
+public class TestAESEncode extends TestAESBase {
+ @Override
+ public void run() {
+ try {
+ if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+ if (checkOutput) {
+ // checked version creates new output buffer each time
+ encode = cipher.doFinal(input, 0, msgSize);
+ compareArrays(encode, expectedEncode);
+ } else {
+ // non-checked version outputs to existing encode buffer for maximum speed
+ encode = new byte[cipher.getOutputSize(msgSize)];
+ cipher.doFinal(input, 0, msgSize, encode);
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ @Override
+ void childShowCipher() {
+ showCipher(cipher, "Encryption");
+ }
+
+}
diff --git a/test/compiler/7184394/TestAESMain.java b/test/compiler/7184394/TestAESMain.java
new file mode 100644
index 000000000..ca2cb43dc
--- /dev/null
+++ b/test/compiler/7184394/TestAESMain.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7184394
+ * @summary add intrinsics to use AES instructions
+ *
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain
+ *
+ * @author Tom Deneau
+ */
+
+public class TestAESMain {
+ public static void main(String[] args) {
+ int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 1000000);
+ System.out.println(iters + " iterations");
+ TestAESEncode etest = new TestAESEncode();
+ etest.prepare();
+ long start = System.nanoTime();
+ for (int i=0; i<iters; i++) {
+ etest.run();
+ }
+ long end = System.nanoTime();
+ System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+
+ TestAESDecode dtest = new TestAESDecode();
+ dtest.prepare();
+ start = System.nanoTime();
+ for (int i=0; i<iters; i++) {
+ dtest.run();
+ }
+ end = System.nanoTime();
+ System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+ }
+}
diff --git a/test/compiler/8000805/Test8000805.java b/test/compiler/8000805/Test8000805.java
new file mode 100644
index 000000000..bd0b7b41c
--- /dev/null
+++ b/test/compiler/8000805/Test8000805.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8000805
+ * @summary JMM issue: short loads are non-atomic
+ *
+ * @run main/othervm -server -XX:-TieredCompilation -Xcomp -XX:+PrintCompilation -XX:CompileOnly=Test8000805.loadS2LmaskFF,Test8000805.loadS2Lmask16,Test8000805.loadS2Lmask13,Test8000805.loadUS_signExt,Test8000805.loadB2L_mask8 Test8000805
+ */
+
+public class Test8000805 {
+ static long loadS2LmaskFF (short[] sa) { return sa[0] & 0xFF; }
+ static long loadS2LmaskFF_1 (short[] sa) { return sa[0] & 0xFF; }
+
+ static long loadS2Lmask16 (short[] sa) { return sa[0] & 0xFFFE; }
+ static long loadS2Lmask16_1 (short[] sa) { return sa[0] & 0xFFFE; }
+
+ static long loadS2Lmask13 (short[] sa) { return sa[0] & 0x0FFF; }
+ static long loadS2Lmask13_1 (short[] sa) { return sa[0] & 0x0FFF; }
+
+ static int loadUS_signExt (char[] ca) { return (ca[0] << 16) >> 16; }
+ static int loadUS_signExt_1 (char[] ca) { return (ca[0] << 16) >> 16; }
+
+ static long loadB2L_mask8 (byte[] ba) { return ba[0] & 0x55; }
+ static long loadB2L_mask8_1 (byte[] ba) { return ba[0] & 0x55; }
+
+ public static void main(String[] args) {
+ for (int i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) {
+ byte[] ba = new byte[] { (byte) i};
+
+ { long v1 = loadB2L_mask8(ba);
+ long v2 = loadB2L_mask8_1(ba);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadB2L_mask8 failed: %x != %x", v1, v2)); }
+ }
+
+ for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
+ short[] sa = new short[] { (short)i };
+ char[] ca = new char[] { (char)i };
+
+ { long v1 = loadS2LmaskFF(sa);
+ long v2 = loadS2LmaskFF_1(sa);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadS2LmaskFF failed: %x != %x", v1, v2)); }
+
+ { long v1 = loadS2Lmask16(sa);
+ long v2 = loadS2Lmask16_1(sa);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadS2Lmask16 failed: %x != %x", v1, v2)); }
+
+ { long v1 = loadS2Lmask13(sa);
+ long v2 = loadS2Lmask13_1(sa);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadS2Lmask13 failed: %x != %x", v1, v2)); }
+
+ { int v1 = loadUS_signExt(ca);
+ int v2 = loadUS_signExt_1(ca);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadUS_signExt failed: %x != %x", v1, v2)); }
+ }
+
+ System.out.println("TEST PASSED.");
+ }
+}
diff --git a/test/compiler/8001183/TestCharVect.java b/test/compiler/8001183/TestCharVect.java
new file mode 100644
index 000000000..a6ff1e2b9
--- /dev/null
+++ b/test/compiler/8001183/TestCharVect.java
@@ -0,0 +1,1332 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8001183
+ * @summary incorrect results of char vectors right shift operaiton
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestCharVect
+ */
+
+public class TestCharVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int ADD_INIT = Character.MAX_VALUE-500;
+ private static final int BIT_MASK = 0xB731;
+ private static final int VALUE = 7;
+ private static final int SHIFT = 16;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Char vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ char[] a0 = new char[ARRLEN];
+ char[] a1 = new char[ARRLEN];
+ short[] a2 = new short[ARRLEN];
+ short[] a3 = new short[ARRLEN];
+ short[] a4 = new short[ARRLEN];
+ int[] p2 = new int[ARRLEN/2];
+ long[] p4 = new long[ARRLEN/4];
+ // Initialize
+ int gold_sum = 0;
+ for (int i=0; i<ARRLEN; i++) {
+ char val = (char)(ADD_INIT+i);
+ gold_sum += val;
+ a1[i] = val;
+ a2[i] = VALUE;
+ a3[i] = -VALUE;
+ a4[i] = (short)BIT_MASK;
+ }
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_sum(a1);
+ test_addc(a0, a1);
+ test_addv(a0, a1, (char)VALUE);
+ test_adda(a0, a1, a2);
+ test_subc(a0, a1);
+ test_subv(a0, a1, (char)VALUE);
+ test_suba(a0, a1, a2);
+
+ test_mulc(a0, a1);
+ test_mulv(a0, a1, (char)VALUE);
+ test_mula(a0, a1, a2);
+ test_divc(a0, a1);
+ test_divv(a0, a1, VALUE);
+ test_diva(a0, a1, a2);
+ test_mulc_n(a0, a1);
+ test_mulv(a0, a1, (char)-VALUE);
+ test_mula(a0, a1, a3);
+ test_divc_n(a0, a1);
+ test_divv(a0, a1, -VALUE);
+ test_diva(a0, a1, a3);
+
+ test_andc(a0, a1);
+ test_andv(a0, a1, (short)BIT_MASK);
+ test_anda(a0, a1, a4);
+ test_orc(a0, a1);
+ test_orv(a0, a1, (short)BIT_MASK);
+ test_ora(a0, a1, a4);
+ test_xorc(a0, a1);
+ test_xorv(a0, a1, (short)BIT_MASK);
+ test_xora(a0, a1, a4);
+
+ test_sllc(a0, a1);
+ test_sllv(a0, a1, VALUE);
+ test_srlc(a0, a1);
+ test_srlv(a0, a1, VALUE);
+ test_srac(a0, a1);
+ test_srav(a0, a1, VALUE);
+
+ test_sllc_n(a0, a1);
+ test_sllv(a0, a1, -VALUE);
+ test_srlc_n(a0, a1);
+ test_srlv(a0, a1, -VALUE);
+ test_srac_n(a0, a1);
+ test_srav(a0, a1, -VALUE);
+
+ test_sllc_o(a0, a1);
+ test_sllv(a0, a1, SHIFT);
+ test_srlc_o(a0, a1);
+ test_srlv(a0, a1, SHIFT);
+ test_srac_o(a0, a1);
+ test_srav(a0, a1, SHIFT);
+
+ test_sllc_on(a0, a1);
+ test_sllv(a0, a1, -SHIFT);
+ test_srlc_on(a0, a1);
+ test_srlv(a0, a1, -SHIFT);
+ test_srac_on(a0, a1);
+ test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
+ test_pack2(p2, a1);
+ test_unpack2(a0, p2);
+ test_pack2_swap(p2, a1);
+ test_unpack2_swap(a0, p2);
+ test_pack4(p4, a1);
+ test_unpack4(a0, p4);
+ test_pack4_swap(p4, a1);
+ test_unpack4_swap(a0, p4);
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ int sum = test_sum(a1);
+ if (sum != gold_sum) {
+ System.err.println("test_sum: " + sum + " != " + gold_sum);
+ errn++;
+ }
+
+ test_addc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_addc: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+ }
+ test_addv(a0, a1, (char)VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_addv: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+ }
+ test_adda(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_adda: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+ }
+
+ test_subc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_subc: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+ }
+ test_subv(a0, a1, (char)VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_subv: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+ }
+ test_suba(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_suba: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+ }
+
+ test_mulc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulc: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+ }
+ test_mulv(a0, a1, (char)VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulv: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+ }
+ test_mula(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mula: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+ }
+
+ test_divc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divc: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+ }
+ test_divv(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divv: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+ }
+ test_diva(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_diva: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+ }
+
+ test_mulc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+ }
+ test_mulv(a0, a1, (char)-VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+ }
+ test_mula(a0, a1, a3);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mula_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+ }
+
+ test_divc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+ }
+ test_divv(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+ }
+ test_diva(a0, a1, a3);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_diva_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+ }
+
+ test_andc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_andc: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+ }
+ test_andv(a0, a1, (short)BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_andv: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+ }
+ test_anda(a0, a1, a4);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_anda: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+ }
+
+ test_orc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_orc: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+ }
+ test_orv(a0, a1, (short)BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_orv: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+ }
+ test_ora(a0, a1, a4);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ora: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+ }
+
+ test_xorc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_xorc: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+ }
+ test_xorv(a0, a1, (short)BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_xorv: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+ }
+ test_xora(a0, a1, a4);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_xora: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+ }
+
+ test_sllc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc: ", i, a0[i], (char)((char)(ADD_INIT+i)<<VALUE));
+ }
+ test_sllv(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv: ", i, a0[i], (char)((char)(ADD_INIT+i)<<VALUE));
+ }
+
+ test_srlc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>VALUE));
+ }
+ test_srlv(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>VALUE));
+ }
+
+ test_srac(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac: ", i, a0[i], (char)((char)(ADD_INIT+i)>>VALUE));
+ }
+ test_srav(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav: ", i, a0[i], (char)((char)(ADD_INIT+i)>>VALUE));
+ }
+
+ test_sllc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-VALUE)));
+ }
+ test_sllv(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-VALUE)));
+ }
+
+ test_srlc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-VALUE)));
+ }
+ test_srlv(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-VALUE)));
+ }
+
+ test_srac_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-VALUE)));
+ }
+ test_srav(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-VALUE)));
+ }
+
+ test_sllc_o(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_o: ", i, a0[i], (char)((char)(ADD_INIT+i)<<SHIFT));
+ }
+ test_sllv(a0, a1, SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_o: ", i, a0[i], (char)((char)(ADD_INIT+i)<<SHIFT));
+ }
+
+ test_srlc_o(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>SHIFT));
+ }
+ test_srlv(a0, a1, SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>SHIFT));
+ }
+
+ test_srac_o(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>SHIFT));
+ }
+ test_srav(a0, a1, SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>SHIFT));
+ }
+
+ test_sllc_on(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_on: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-SHIFT)));
+ }
+ test_sllv(a0, a1, -SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_on: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-SHIFT)));
+ }
+
+ test_srlc_on(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-SHIFT)));
+ }
+ test_srlv(a0, a1, -SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-SHIFT)));
+ }
+
+ test_srac_on(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-SHIFT)));
+ }
+ test_srav(a0, a1, -SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-SHIFT)));
+ }
+
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
+ test_pack2(p2, a1);
+ for (int i=0; i<ARRLEN/2; i++) {
+ errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack2(a0, p2);
+ for (int i=0; i<(ARRLEN&(-2)); i++) {
+ errn += verify("test_unpack2: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ test_pack2_swap(p2, a1);
+ for (int i=0; i<ARRLEN/2; i++) {
+ errn += verify("test_pack2_swap: ", i, p2[i], ((int)(ADD_INIT+2*i+1) & 0xFFFF) | ((int)(ADD_INIT+2*i) << 16));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack2_swap(a0, p2);
+ for (int i=0; i<(ARRLEN&(-2)); i++) {
+ errn += verify("test_unpack2_swap: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ test_pack4(p4, a1);
+ for (int i=0; i<ARRLEN/4; i++) {
+ errn += verify("test_pack4: ", i, p4[i], ((long)(ADD_INIT+4*i+0) & 0xFFFFl) |
+ (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 16) |
+ (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 32) |
+ (((long)(ADD_INIT+4*i+3) & 0xFFFFl) << 48));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack4(a0, p4);
+ for (int i=0; i<(ARRLEN&(-4)); i++) {
+ errn += verify("test_unpack4: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ test_pack4_swap(p4, a1);
+ for (int i=0; i<ARRLEN/4; i++) {
+ errn += verify("test_pack4_swap: ", i, p4[i], ((long)(ADD_INIT+4*i+3) & 0xFFFFl) |
+ (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 16) |
+ (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 32) |
+ (((long)(ADD_INIT+4*i+0) & 0xFFFFl) << 48));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack4_swap(a0, p4);
+ for (int i=0; i<(ARRLEN&(-4)); i++) {
+ errn += verify("test_unpack4_swap: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sum(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sum: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_addc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_addc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_addv(a0, a1, (char)VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_addv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_adda(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_adda: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_subc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_subc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_subv(a0, a1, (char)VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_subv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_suba(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_suba: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulv(a0, a1, (char)VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mula(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mula: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divv(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_diva(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_diva: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulv(a0, a1, (char)-VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulv_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mula(a0, a1, a3);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mula_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divv(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divv_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_diva(a0, a1, a3);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_diva_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_andc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_andc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_andv(a0, a1, (short)BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_andv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_anda(a0, a1, a4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_anda: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_orc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_orc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_orv(a0, a1, (short)BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_orv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ora(a0, a1, a4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ora: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_xorc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_xorc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_xorv(a0, a1, (short)BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_xorv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_xora(a0, a1, a4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_xora: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_o(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_o: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_o: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_o(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_o: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_o: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_o(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_o: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_o: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_on(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, -SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_on: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_on(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, -SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_on: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_on(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, -SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_on: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack2(p2, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack2: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack2(a0, p2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack2: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack2_swap(p2, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack2_swap: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack2_swap(a0, p2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack2_swap: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack4(p4, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack4: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack4(a0, p4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack4: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack4_swap(p4, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack4_swap: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack4_swap(a0, p4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack4_swap: " + (end - start));
+
+ return errn;
+ }
+
+ static int test_sum(char[] a1) {
+ int sum = 0;
+ for (int i = 0; i < a1.length; i+=1) {
+ sum += a1[i];
+ }
+ return sum;
+ }
+
+ static void test_addc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]+VALUE);
+ }
+ }
+ static void test_addv(char[] a0, char[] a1, char b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]+b);
+ }
+ }
+ static void test_adda(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]+a2[i]);
+ }
+ }
+
+ static void test_subc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]-VALUE);
+ }
+ }
+ static void test_subv(char[] a0, char[] a1, char b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]-b);
+ }
+ }
+ static void test_suba(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]-a2[i]);
+ }
+ }
+
+ static void test_mulc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*VALUE);
+ }
+ }
+ static void test_mulc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*(-VALUE));
+ }
+ }
+ static void test_mulv(char[] a0, char[] a1, char b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*b);
+ }
+ }
+ static void test_mula(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*a2[i]);
+ }
+ }
+
+ static void test_divc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/VALUE);
+ }
+ }
+ static void test_divc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/(-VALUE));
+ }
+ }
+ static void test_divv(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/b);
+ }
+ }
+ static void test_diva(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/a2[i]);
+ }
+ }
+
+ static void test_andc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]&BIT_MASK);
+ }
+ }
+ static void test_andv(char[] a0, char[] a1, short b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]&b);
+ }
+ }
+ static void test_anda(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]&a2[i]);
+ }
+ }
+
+ static void test_orc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]|BIT_MASK);
+ }
+ }
+ static void test_orv(char[] a0, char[] a1, short b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]|b);
+ }
+ }
+ static void test_ora(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]|a2[i]);
+ }
+ }
+
+ static void test_xorc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]^BIT_MASK);
+ }
+ }
+ static void test_xorv(char[] a0, char[] a1, short b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]^b);
+ }
+ }
+ static void test_xora(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]^a2[i]);
+ }
+ }
+
+ static void test_sllc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<VALUE);
+ }
+ }
+ static void test_sllc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<(-VALUE));
+ }
+ }
+ static void test_sllc_o(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<SHIFT);
+ }
+ }
+ static void test_sllc_on(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<(-SHIFT));
+ }
+ }
+ static void test_sllv(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<b);
+ }
+ }
+ static void test_sllc_add(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & b)<<VALUE);
+ }
+ }
+
+ static void test_srlc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>VALUE);
+ }
+ }
+ static void test_srlc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>(-VALUE));
+ }
+ }
+ static void test_srlc_o(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>SHIFT);
+ }
+ }
+ static void test_srlc_on(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>(-SHIFT));
+ }
+ }
+ static void test_srlv(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>b);
+ }
+ }
+ static void test_srlc_add(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & b)>>>VALUE);
+ }
+ }
+
+ static void test_srac(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>VALUE);
+ }
+ }
+ static void test_srac_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>(-VALUE));
+ }
+ }
+ static void test_srac_o(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>SHIFT);
+ }
+ }
+ static void test_srac_on(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>(-SHIFT));
+ }
+ }
+ static void test_srav(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>b);
+ }
+ }
+ static void test_srac_add(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & b)>>VALUE);
+ }
+ }
+
+ static void test_pack2(int[] p2, char[] a1) {
+ if (p2.length*2 > a1.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l0 = (int)a1[i*2+0];
+ int l1 = (int)a1[i*2+1];
+ p2[i] = (l1 << 16) | (l0 & 0xFFFF);
+ }
+ }
+ static void test_unpack2(char[] a0, int[] p2) {
+ if (p2.length*2 > a0.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l = p2[i];
+ a0[i*2+0] = (char)(l & 0xFFFF);
+ a0[i*2+1] = (char)(l >> 16);
+ }
+ }
+ static void test_pack2_swap(int[] p2, char[] a1) {
+ if (p2.length*2 > a1.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l0 = (int)a1[i*2+0];
+ int l1 = (int)a1[i*2+1];
+ p2[i] = (l0 << 16) | (l1 & 0xFFFF);
+ }
+ }
+ static void test_unpack2_swap(char[] a0, int[] p2) {
+ if (p2.length*2 > a0.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l = p2[i];
+ a0[i*2+0] = (char)(l >> 16);
+ a0[i*2+1] = (char)(l & 0xFFFF);
+ }
+ }
+
+ static void test_pack4(long[] p4, char[] a1) {
+ if (p4.length*4 > a1.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l0 = (long)a1[i*4+0];
+ long l1 = (long)a1[i*4+1];
+ long l2 = (long)a1[i*4+2];
+ long l3 = (long)a1[i*4+3];
+ p4[i] = (l0 & 0xFFFFl) |
+ ((l1 & 0xFFFFl) << 16) |
+ ((l2 & 0xFFFFl) << 32) |
+ ((l3 & 0xFFFFl) << 48);
+ }
+ }
+ static void test_unpack4(char[] a0, long[] p4) {
+ if (p4.length*4 > a0.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l = p4[i];
+ a0[i*4+0] = (char)(l & 0xFFFFl);
+ a0[i*4+1] = (char)(l >> 16);
+ a0[i*4+2] = (char)(l >> 32);
+ a0[i*4+3] = (char)(l >> 48);
+ }
+ }
+ static void test_pack4_swap(long[] p4, char[] a1) {
+ if (p4.length*4 > a1.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l0 = (long)a1[i*4+0];
+ long l1 = (long)a1[i*4+1];
+ long l2 = (long)a1[i*4+2];
+ long l3 = (long)a1[i*4+3];
+ p4[i] = (l3 & 0xFFFFl) |
+ ((l2 & 0xFFFFl) << 16) |
+ ((l1 & 0xFFFFl) << 32) |
+ ((l0 & 0xFFFFl) << 48);
+ }
+ }
+ static void test_unpack4_swap(char[] a0, long[] p4) {
+ if (p4.length*4 > a0.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l = p4[i];
+ a0[i*4+0] = (char)(l >> 48);
+ a0[i*4+1] = (char)(l >> 32);
+ a0[i*4+2] = (char)(l >> 16);
+ a0[i*4+3] = (char)(l & 0xFFFFl);
+ }
+ }
+
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+ return 1;
+ }
+ return 0;
+ }
+}