aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.hgtags1
-rw-r--r--make/hotspot_version2
-rw-r--r--src/cpu/ppc/vm/cppInterpreter_ppc.cpp11
-rw-r--r--src/cpu/ppc/vm/frame_ppc.inline.hpp2
-rw-r--r--src/cpu/ppc/vm/interp_masm_ppc_64.hpp2
-rw-r--r--src/cpu/ppc/vm/interpreterRT_ppc.cpp1
-rw-r--r--src/cpu/ppc/vm/interpreter_ppc.cpp18
-rw-r--r--src/cpu/ppc/vm/jniFastGetField_ppc.cpp6
-rw-r--r--src/cpu/ppc/vm/ppc.ad158
-rw-r--r--src/cpu/ppc/vm/templateInterpreter_ppc.cpp38
-rw-r--r--src/cpu/ppc/vm/templateTable_ppc_64.cpp18
-rw-r--r--src/cpu/sparc/vm/assembler_sparc.hpp32
-rw-r--r--src/cpu/sparc/vm/stubGenerator_sparc.cpp697
-rw-r--r--src/cpu/sparc/vm/stubRoutines_sparc.hpp2
-rw-r--r--src/cpu/sparc/vm/vm_version_sparc.cpp8
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp6
-rw-r--r--src/cpu/x86/vm/vm_version_x86.cpp6
-rw-r--r--src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp70
-rw-r--r--src/share/vm/ci/ciReplay.cpp16
-rw-r--r--src/share/vm/classfile/vmSymbols.hpp2
-rw-r--r--src/share/vm/code/nmethod.cpp6
-rw-r--r--src/share/vm/oops/klass.cpp1
-rw-r--r--src/share/vm/opto/compile.cpp5
-rw-r--r--src/share/vm/opto/compile.hpp3
-rw-r--r--src/share/vm/opto/loopnode.cpp9
-rw-r--r--src/share/vm/opto/memnode.cpp27
-rw-r--r--src/share/vm/opto/node.cpp24
-rw-r--r--src/share/vm/opto/runtime.cpp4
-rw-r--r--src/share/vm/runtime/advancedThresholdPolicy.cpp3
-rw-r--r--src/share/vm/runtime/arguments.cpp4
-rw-r--r--src/share/vm/runtime/compilationPolicy.cpp1
-rw-r--r--src/share/vm/runtime/sharedRuntime.cpp15
-rw-r--r--src/share/vm/runtime/simpleThresholdPolicy.cpp7
-rw-r--r--test/compiler/7184394/TestAESBase.java54
-rw-r--r--test/compiler/7184394/TestAESDecode.java15
-rw-r--r--test/compiler/7184394/TestAESEncode.java15
-rw-r--r--test/compiler/7184394/TestAESMain.java23
37 files changed, 978 insertions, 334 deletions
diff --git a/.hgtags b/.hgtags
index d4fce46da..124bfd805 100644
--- a/.hgtags
+++ b/.hgtags
@@ -462,3 +462,4 @@ b6a2ba7d3ea7259a76c8ff1ec22fac9094494c1c hs25.20-b11
3c291bc2aa7c58efb1219701f38c41731609e595 hs25.20-b12
18ae0dac7620474547aa1721bc3fd748af07b8b5 jdk8u20-b12
47951595af60460a479b8574622375bfbf5c8ed2 jdk8u20-b13
+798f5b02be897151fdad44d695446088b1cca6b1 hs25.20-b13
diff --git a/make/hotspot_version b/make/hotspot_version
index 0559e053f..7497f5ea5 100644
--- a/make/hotspot_version
+++ b/make/hotspot_version
@@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2014
HS_MAJOR_VER=25
HS_MINOR_VER=20
-HS_BUILD_NUMBER=12
+HS_BUILD_NUMBER=14
JDK_MAJOR_VER=1
JDK_MINOR_VER=8
diff --git a/src/cpu/ppc/vm/cppInterpreter_ppc.cpp b/src/cpu/ppc/vm/cppInterpreter_ppc.cpp
index 5cf5a6a65..2ba4c55c6 100644
--- a/src/cpu/ppc/vm/cppInterpreter_ppc.cpp
+++ b/src/cpu/ppc/vm/cppInterpreter_ppc.cpp
@@ -1,3 +1,4 @@
+
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012, 2013 SAP AG. All rights reserved.
@@ -403,7 +404,7 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(Label& stack_ov
BLOCK_COMMENT("compute_interpreter_state {");
// access_flags = method->access_flags();
- // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size");
+ // TODO: PPC port: assert(4 == sizeof(AccessFlags), "unexpected field size");
__ lwa(access_flags, method_(access_flags));
// parameter_count = method->constMethod->size_of_parameters();
@@ -1055,7 +1056,7 @@ address CppInterpreterGenerator::generate_native_entry(void) {
assert(access_flags->is_nonvolatile(),
"access_flags must be in a non-volatile register");
// Type check.
- // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size");
+ // TODO: PPC port: assert(4 == sizeof(AccessFlags), "unexpected field size");
__ lwz(access_flags, method_(access_flags));
// We don't want to reload R19_method and access_flags after calls
@@ -1838,7 +1839,7 @@ address CppInterpreterGenerator::generate_normal_entry(void) {
// Interpreter state fields.
const Register msg = R24_tmp4;
- // MethodOop fields.
+ // Method fields.
const Register parameter_count = R25_tmp5;
const Register result_index = R26_tmp6;
@@ -2023,7 +2024,7 @@ address CppInterpreterGenerator::generate_normal_entry(void) {
__ add(R17_tos, R17_tos, parameter_count);
// Result stub address array index
- // TODO: PPC port: assert(4 == methodOopDesc::sz_result_index(), "unexpected field size");
+ // TODO: PPC port: assert(4 == sizeof(AccessFlags), "unexpected field size");
__ lwa(result_index, method_(result_index));
__ li(msg, BytecodeInterpreter::method_resume);
@@ -2709,7 +2710,7 @@ address CppInterpreterGenerator::generate_normal_entry(void) {
__ ld(R3_ARG1, state_(_result._osr._osr_buf));
__ mtctr(R12_scratch2);
- // Load method oop, gc may move it during execution of osr'd method.
+ // Load method, gc may move it during execution of osr'd method.
__ ld(R22_tmp2, state_(_method));
// Load message 'call_method'.
__ li(R23_tmp3, BytecodeInterpreter::call_method);
diff --git a/src/cpu/ppc/vm/frame_ppc.inline.hpp b/src/cpu/ppc/vm/frame_ppc.inline.hpp
index 8e46363ea..6186906a8 100644
--- a/src/cpu/ppc/vm/frame_ppc.inline.hpp
+++ b/src/cpu/ppc/vm/frame_ppc.inline.hpp
@@ -26,6 +26,8 @@
#ifndef CPU_PPC_VM_FRAME_PPC_INLINE_HPP
#define CPU_PPC_VM_FRAME_PPC_INLINE_HPP
+#include "code/codeCache.hpp"
+
// Inline functions for ppc64 frames:
// Find codeblob and set deopt_state.
diff --git a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
index 9846bb511..b4c95a645 100644
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
@@ -26,7 +26,7 @@
#ifndef CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
#define CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
-#include "assembler_ppc.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/invocationCounter.hpp"
// This file specializes the assembler with interpreter-specific macros.
diff --git a/src/cpu/ppc/vm/interpreterRT_ppc.cpp b/src/cpu/ppc/vm/interpreterRT_ppc.cpp
index 5d45d8bf8..98ab1041d 100644
--- a/src/cpu/ppc/vm/interpreterRT_ppc.cpp
+++ b/src/cpu/ppc/vm/interpreterRT_ppc.cpp
@@ -24,6 +24,7 @@
*/
#include "precompiled.hpp"
+#include "asm/assembler.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "memory/allocation.inline.hpp"
diff --git a/src/cpu/ppc/vm/interpreter_ppc.cpp b/src/cpu/ppc/vm/interpreter_ppc.cpp
index 80bfc2d29..a35b1ebb7 100644
--- a/src/cpu/ppc/vm/interpreter_ppc.cpp
+++ b/src/cpu/ppc/vm/interpreter_ppc.cpp
@@ -139,32 +139,16 @@ address AbstractInterpreterGenerator::generate_slow_signature_handler() {
// Signature is in R3_RET. Signature is callee saved.
__ mr(signature, R3_RET);
- // Reload method, it may have moved.
-#ifdef CC_INTERP
- __ ld(R19_method, state_(_method));
-#else
- __ ld(R19_method, 0, target_sp);
- __ ld(R19_method, _ijava_state_neg(method), R19_method);
-#endif
-
// Get the result handler.
__ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_result_handler), R16_thread, R19_method);
- // Reload method, it may have moved.
-#ifdef CC_INTERP
- __ ld(R19_method, state_(_method));
-#else
- __ ld(R19_method, 0, target_sp);
- __ ld(R19_method, _ijava_state_neg(method), R19_method);
-#endif
-
{
Label L;
// test if static
// _access_flags._flags must be at offset 0.
// TODO PPC port: requires change in shared code.
//assert(in_bytes(AccessFlags::flags_offset()) == 0,
- // "MethodOopDesc._access_flags == MethodOopDesc._access_flags._flags");
+ // "MethodDesc._access_flags == MethodDesc._access_flags._flags");
// _access_flags must be a 32 bit value.
assert(sizeof(AccessFlags) == 4, "wrong size");
__ lwa(R11_scratch1/*access_flags*/, method_(access_flags));
diff --git a/src/cpu/ppc/vm/jniFastGetField_ppc.cpp b/src/cpu/ppc/vm/jniFastGetField_ppc.cpp
index 88cc7f148..b16be25c5 100644
--- a/src/cpu/ppc/vm/jniFastGetField_ppc.cpp
+++ b/src/cpu/ppc/vm/jniFastGetField_ppc.cpp
@@ -32,7 +32,7 @@
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
- // we don't have fast jni accessors.
+ // We don't have fast jni accessors.
return (address) -1;
}
@@ -57,12 +57,12 @@ address JNI_FastGetField::generate_fast_get_int_field() {
}
address JNI_FastGetField::generate_fast_get_long_field() {
- // we don't have fast jni accessors.
+ // We don't have fast jni accessors.
return (address) -1;
}
address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
- // e don't have fast jni accessors.
+ // We don't have fast jni accessors.
return (address) -1;
}
diff --git a/src/cpu/ppc/vm/ppc.ad b/src/cpu/ppc/vm/ppc.ad
index 2989ca516..bb4199462 100644
--- a/src/cpu/ppc/vm/ppc.ad
+++ b/src/cpu/ppc/vm/ppc.ad
@@ -898,7 +898,7 @@ source_hpp %{
// To keep related declarations/definitions/uses close together,
// we switch between source %{ }% and source_hpp %{ }% freely as needed.
- // Returns true if Node n is followed by a MemBar node that
+ // Returns true if Node n is followed by a MemBar node that
// will do an acquire. If so, this node must not do the acquire
// operation.
bool followed_by_acquire(const Node *n);
@@ -908,7 +908,7 @@ source %{
// Optimize load-acquire.
//
-// Check if acquire is unnecessary due to following operation that does
+// Check if acquire is unnecessary due to following operation that does
// acquire anyways.
// Walk the pattern:
//
@@ -919,12 +919,12 @@ source %{
// Proj(ctrl) Proj(mem)
// | |
// MemBarRelease/Volatile
-//
+//
bool followed_by_acquire(const Node *load) {
assert(load->is_Load(), "So far implemented only for loads.");
// Find MemBarAcquire.
- const Node *mba = NULL;
+ const Node *mba = NULL;
for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
const Node *out = load->fast_out(i);
if (out->Opcode() == Op_MemBarAcquire) {
@@ -937,7 +937,7 @@ bool followed_by_acquire(const Node *load) {
// Find following MemBar node.
//
- // The following node must be reachable by control AND memory
+ // The following node must be reachable by control AND memory
// edge to assure no other operations are in between the two nodes.
//
// So first get the Proj node, mem_proj, to use it to iterate forward.
@@ -1135,6 +1135,7 @@ class CallStubImpl {
public:
+ // Emit call stub, compiled java to interpreter.
static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
// Size of call trampoline stub.
@@ -2752,7 +2753,7 @@ encode %{
// inputs for new nodes
m1->add_req(NULL, n_toc);
m2->add_req(NULL, m1);
-
+
// operands for new nodes
m1->_opnds[0] = new (C) iRegPdstOper(); // dst
m1->_opnds[1] = op_src; // src
@@ -2760,29 +2761,29 @@ encode %{
m2->_opnds[0] = new (C) iRegPdstOper(); // dst
m2->_opnds[1] = op_src; // src
m2->_opnds[2] = new (C) iRegLdstOper(); // base
-
+
// Initialize ins_attrib TOC fields.
m1->_const_toc_offset = -1;
m2->_const_toc_offset_hi_node = m1;
-
+
// Register allocation for new nodes.
ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
-
+
nodes->push(m1);
nodes->push(m2);
assert(m2->bottom_type()->isa_ptr(), "must be ptr");
} else {
loadConPNode *m2 = new (C) loadConPNode();
-
+
// inputs for new nodes
m2->add_req(NULL, n_toc);
-
+
// operands for new nodes
m2->_opnds[0] = new (C) iRegPdstOper(); // dst
m2->_opnds[1] = op_src; // src
m2->_opnds[2] = new (C) iRegPdstOper(); // toc
-
+
// Register allocation for new nodes.
ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -2974,17 +2975,17 @@ encode %{
n_sub_base->_opnds[1] = op_crx;
n_sub_base->_opnds[2] = op_src;
n_sub_base->_bottom_type = _bottom_type;
-
+
n_shift->add_req(n_region, n_sub_base);
n_shift->_opnds[0] = op_dst;
n_shift->_opnds[1] = op_dst;
n_shift->_bottom_type = _bottom_type;
-
+
ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
-
+
nodes->push(n_move);
nodes->push(n_compare);
nodes->push(n_sub_base);
@@ -3061,20 +3062,20 @@ encode %{
} else {
// before Power 7
cond_add_baseNode *n_add_base = new (C) cond_add_baseNode();
-
+
n_add_base->add_req(n_region, n_compare, n_shift);
n_add_base->_opnds[0] = op_dst;
n_add_base->_opnds[1] = op_crx;
n_add_base->_opnds[2] = op_dst;
n_add_base->_bottom_type = _bottom_type;
-
+
assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
ra_->set_oop(n_add_base, true);
-
+
ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
-
+
nodes->push(n_compare);
nodes->push(n_shift);
nodes->push(n_add_base);
@@ -3631,11 +3632,11 @@ encode %{
// Req...
for (uint i = 0; i < req(); ++i) {
// The expanded node does not need toc any more.
- // Add the inline cache constant here instead. This expresses the
+ // Add the inline cache constant here instead. This expresses the
// register of the inline cache must be live at the call.
// Else we would have to adapt JVMState by -1.
if (i == mach_constant_base_node_input()) {
- call->add_req(loadConLNodes_IC._last);
+ call->add_req(loadConLNodes_IC._last);
} else {
call->add_req(in(i));
}
@@ -3663,6 +3664,8 @@ encode %{
%}
// Compound version of call dynamic
+ // Toc is only passed so that it can be used in ins_encode statement.
+ // In the code we have to use $constanttablebase.
enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
MacroAssembler _masm(&cbuf);
@@ -3670,14 +3673,17 @@ encode %{
Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
#if 0
+ int vtable_index = this->_vtable_index;
if (_vtable_index < 0) {
// Must be invalid_vtable_index, not nonvirtual_vtable_index.
assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value");
Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
- AddressLiteral meta = __ allocate_metadata_address((Metadata *)Universe::non_oop_word());
+ // Virtual call relocation will point to ic load.
address virtual_call_meta_addr = __ pc();
- __ load_const_from_method_toc(ic_reg, meta, Rtoc);
+ // Load a clear inline cache.
+ AddressLiteral empty_ic((address) Universe::non_oop_word());
+ __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc);
// CALL to fixup routine. Fixup routine uses ScopeDesc info
// to determine who we intended to call.
__ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
@@ -3710,7 +3716,6 @@ encode %{
"Fix constant in ret_addr_offset()");
}
#endif
- guarantee(0, "Fix handling of toc edge: messes up derived/base pairs.");
Unimplemented(); // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!).
%}
@@ -5436,7 +5441,7 @@ instruct loadI_ac(iRegIdst dst, memory mem) %{
ins_pipe(pipe_class_memory);
%}
-// Match loading integer and casting it to unsigned int in
+// Match loading integer and casting it to unsigned int in
// long register.
// LoadI + ConvI2L + AndL 0xffffffff.
instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
@@ -6078,7 +6083,7 @@ instruct loadConNKlass_hi(iRegNdst dst, immNKlass src) %{
ins_pipe(pipe_class_default);
%}
-// This needs a match rule so that build_oop_map knows this is
+// This needs a match rule so that build_oop_map knows this is
// not a narrow oop.
instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
match(Set dst src1);
@@ -6702,7 +6707,7 @@ instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
size(4);
ins_encode %{
// This is a Power7 instruction for which no machine description exists.
- // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
__ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
%}
ins_pipe(pipe_class_default);
@@ -6847,7 +6852,7 @@ instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
size(4);
ins_encode %{
// This is a Power7 instruction for which no machine description exists.
- // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
__ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
%}
ins_pipe(pipe_class_default);
@@ -7064,7 +7069,7 @@ instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc s
n1->_bottom_type = _bottom_type;
decodeNKlass_shiftNode *n2 = new (C) decodeNKlass_shiftNode();
- n2->add_req(n_region, n2);
+ n2->add_req(n_region, n1);
n2->_opnds[0] = op_dst;
n2->_opnds[1] = op_dst;
n2->_bottom_type = _bottom_type;
@@ -7199,7 +7204,7 @@ instruct membar_volatile() %{
// inline_unsafe_load_store).
//
// Add this node again if we found a good solution for inline_unsafe_load_store().
-// Don't forget to look at the implementation of post_store_load_barrier again,
+// Don't forget to look at the implementation of post_store_load_barrier again,
// we did other fixes in that method.
//instruct unnecessary_membar_volatile() %{
// match(MemBarVolatile);
@@ -7237,7 +7242,7 @@ instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
// exists. Anyways, the scheduler should be off on Power7.
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
int cc = $cmp$$cmpcode;
- __ isel($dst$$Register, $crx$$CondRegister,
+ __ isel($dst$$Register, $crx$$CondRegister,
(Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
%}
ins_pipe(pipe_class_default);
@@ -7283,7 +7288,7 @@ instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
// exists. Anyways, the scheduler should be off on Power7.
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
int cc = $cmp$$cmpcode;
- __ isel($dst$$Register, $crx$$CondRegister,
+ __ isel($dst$$Register, $crx$$CondRegister,
(Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
%}
ins_pipe(pipe_class_default);
@@ -7329,7 +7334,7 @@ instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
// exists. Anyways, the scheduler should be off on Power7.
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
int cc = $cmp$$cmpcode;
- __ isel($dst$$Register, $crx$$CondRegister,
+ __ isel($dst$$Register, $crx$$CondRegister,
(Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
%}
ins_pipe(pipe_class_default);
@@ -7376,7 +7381,7 @@ instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{
// exists. Anyways, the scheduler should be off on Power7.
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
int cc = $cmp$$cmpcode;
- __ isel($dst$$Register, $crx$$CondRegister,
+ __ isel($dst$$Register, $crx$$CondRegister,
(Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
%}
ins_pipe(pipe_class_default);
@@ -7522,8 +7527,8 @@ instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
// CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
- __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
- MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
+ MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
$res$$Register, true);
%}
ins_pipe(pipe_class_default);
@@ -7929,7 +7934,23 @@ instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
// positive longs and 0xF...F for negative ones.
-instruct signmask64I_regI(iRegIdst dst, iRegIsrc src) %{
+instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
+ // no match-rule, false predicate
+ effect(DEF dst, USE src);
+ predicate(false);
+
+ format %{ "SRADI $dst, $src, #63" %}
+ size(4);
+ ins_encode %{
+ // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
+ __ sradi($dst$$Register, $src$$Register, 0x3f);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
+// positive longs and 0xF...F for negative ones.
+instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
// no match-rule, false predicate
effect(DEF dst, USE src);
predicate(false);
@@ -8893,7 +8914,7 @@ instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %
size(4);
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
- __ rlwinm($dst$$Register, $src1$$Register, 0,
+ __ rlwinm($dst$$Register, $src1$$Register, 0,
(31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f);
%}
ins_pipe(pipe_class_default);
@@ -9619,14 +9640,14 @@ instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
ins_cost(DEFAULT_COST*4);
expand %{
- iRegIdst src1s;
- iRegIdst src2s;
- iRegIdst diff;
- sxtI_reg(src1s, src1); // ensure proper sign extention
- sxtI_reg(src2s, src2); // ensure proper sign extention
- subI_reg_reg(diff, src1s, src2s);
+ iRegLdst src1s;
+ iRegLdst src2s;
+ iRegLdst diff;
+ convI2L_reg(src1s, src1); // Ensure proper sign extension.
+ convI2L_reg(src2s, src2); // Ensure proper sign extension.
+ subL_reg_reg(diff, src1s, src2s);
// Need to consider >=33 bit result, therefore we need signmaskL.
- signmask64I_regI(dst, diff);
+ signmask64I_regL(dst, diff);
%}
%}
@@ -10863,7 +10884,7 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe
format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
ins_encode %{
// TODO: PPC port $archOpcode(ppc64Opcode_compound);
- __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
+ __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
$tmp_klass$$Register, NULL, $result$$Register);
%}
ins_pipe(pipe_class_default);
@@ -11178,18 +11199,18 @@ instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
ins_cost(DEFAULT_COST*6);
expand %{
- iRegIdst src1s;
- iRegIdst src2s;
- iRegIdst diff;
- iRegIdst sm;
- iRegIdst doz; // difference or zero
- sxtI_reg(src1s, src1); // Ensure proper sign extention.
- sxtI_reg(src2s, src2); // Ensure proper sign extention.
- subI_reg_reg(diff, src2s, src1s);
+ iRegLdst src1s;
+ iRegLdst src2s;
+ iRegLdst diff;
+ iRegLdst sm;
+ iRegLdst doz; // difference or zero
+ convI2L_reg(src1s, src1); // Ensure proper sign extension.
+ convI2L_reg(src2s, src2); // Ensure proper sign extension.
+ subL_reg_reg(diff, src2s, src1s);
// Need to consider >=33 bit result, therefore we need signmaskL.
- signmask64I_regI(sm, diff);
- andI_reg_reg(doz, diff, sm); // <=0
- addI_reg_reg(dst, doz, src1s);
+ signmask64L_regL(sm, diff);
+ andL_reg_reg(doz, diff, sm); // <=0
+ addI_regL_regL(dst, doz, src1s);
%}
%}
@@ -11198,19 +11219,18 @@ instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
ins_cost(DEFAULT_COST*6);
expand %{
- immI_minus1 m1 %{ -1 %}
- iRegIdst src1s;
- iRegIdst src2s;
- iRegIdst diff;
- iRegIdst sm;
- iRegIdst doz; // difference or zero
- sxtI_reg(src1s, src1); // Ensure proper sign extention.
- sxtI_reg(src2s, src2); // Ensure proper sign extention.
- subI_reg_reg(diff, src2s, src1s);
+ iRegLdst src1s;
+ iRegLdst src2s;
+ iRegLdst diff;
+ iRegLdst sm;
+ iRegLdst doz; // difference or zero
+ convI2L_reg(src1s, src1); // Ensure proper sign extension.
+ convI2L_reg(src2s, src2); // Ensure proper sign extension.
+ subL_reg_reg(diff, src2s, src1s);
// Need to consider >=33 bit result, therefore we need signmaskL.
- signmask64I_regI(sm, diff);
- andcI_reg_reg(doz, sm, m1, diff); // >=0
- addI_reg_reg(dst, doz, src1s);
+ signmask64L_regL(sm, diff);
+ andcL_reg_reg(doz, diff, sm); // >=0
+ addI_regL_regL(dst, doz, src1s);
%}
%}
diff --git a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
index f22fbae29..635314d25 100644
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
@@ -81,24 +81,18 @@ address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(con
#if 0
// Call special ClassCastException constructor taking object to cast
// and target class as arguments.
-address TemplateInterpreterGenerator::generate_ClassCastException_verbose_handler(const char* name) {
+address TemplateInterpreterGenerator::generate_ClassCastException_verbose_handler() {
address entry = __ pc();
- // Target class oop is in register R6_ARG4 by convention!
-
// Expression stack must be empty before entering the VM if an
// exception happened.
__ empty_expression_stack();
- // Setup parameters.
+
// Thread will be loaded to R3_ARG1.
- __ load_const_optimized(R4_ARG2, (address) name);
- __ mr(R5_ARG3, R17_tos);
- // R6_ARG4 contains specified class.
- __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException_verbose));
-#ifdef ASSERT
+ // Target class oop is in register R5_ARG3 by convention!
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException_verbose, R17_tos, R5_ARG3));
// Above call must not return here since exception pending.
- __ should_not_reach_here();
-#endif
+ DEBUG_ONLY(__ should_not_reach_here();)
return entry;
}
#endif
@@ -1535,14 +1529,32 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
__ stw(R0, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
// Get out of the current method and re-execute the call that called us.
- __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ return_pc, R11_scratch1, R12_scratch2);
+ __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2);
__ restore_interpreter_state(R11_scratch1);
__ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1);
__ resize_frame_absolute(R12_scratch2, R11_scratch1, R0);
- __ mtlr(return_pc);
if (ProfileInterpreter) {
__ set_method_data_pointer_for_bcp();
}
+#if INCLUDE_JVMTI
+ Label L_done;
+
+ __ lbz(R11_scratch1, 0, R14_bcp);
+ __ cmpwi(CCR0, R11_scratch1, Bytecodes::_invokestatic);
+ __ bne(CCR0, L_done);
+
+ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
+ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
+ __ ld(R4_ARG2, 0, R18_locals);
+ __ call_VM(R11_scratch1, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),
+ R4_ARG2, R19_method, R14_bcp);
+
+ __ cmpdi(CCR0, R11_scratch1, 0);
+ __ beq(CCR0, L_done);
+
+ __ std(R11_scratch1, wordSize, R15_esp);
+ __ bind(L_done);
+#endif // INCLUDE_JVMTI
__ dispatch_next(vtos);
}
// end of JVMTI PopFrame support
diff --git a/src/cpu/ppc/vm/templateTable_ppc_64.cpp b/src/cpu/ppc/vm/templateTable_ppc_64.cpp
index e7846bc28..87cb82851 100644
--- a/src/cpu/ppc/vm/templateTable_ppc_64.cpp
+++ b/src/cpu/ppc/vm/templateTable_ppc_64.cpp
@@ -64,7 +64,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
assert_different_registers(Rtmp1, Rtmp2, Rtmp3, Rval, Rbase);
switch (barrier) {
-#ifndef SERIALGC
+#if INCLUDE_ALL_GCS
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
@@ -104,7 +104,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
__ bind(Ldone);
}
break;
-#endif // SERIALGC
+#endif // INCLUDE_ALL_GCS
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
@@ -259,17 +259,17 @@ void TemplateTable::fconst(int value) {
switch (value) {
default: ShouldNotReachHere();
case 0: {
- int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0);
+ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
__ lfs(F15_ftos, simm16_offset, R11_scratch1);
break;
}
case 1: {
- int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0);
+ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
__ lfs(F15_ftos, simm16_offset, R11_scratch1);
break;
}
case 2: {
- int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&two, R0);
+ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&two, R0, true);
__ lfs(F15_ftos, simm16_offset, R11_scratch1);
break;
}
@@ -282,12 +282,12 @@ void TemplateTable::dconst(int value) {
static double one = 1.0;
switch (value) {
case 0: {
- int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0);
+ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
__ lfd(F15_ftos, simm16_offset, R11_scratch1);
break;
}
case 1: {
- int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0);
+ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
__ lfd(F15_ftos, simm16_offset, R11_scratch1);
break;
}
@@ -3728,9 +3728,9 @@ void TemplateTable::checkcast() {
transition(atos, atos);
Label Ldone, Lis_null, Lquicked, Lresolved;
- Register Roffset = R5_ARG3,
+ Register Roffset = R6_ARG4,
RobjKlass = R4_ARG2,
- RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect this register.
+ RspecifiedKlass = R5_ARG3, // Generate_ClassCastException_verbose_handler will read value from this register.
Rcpool = R11_scratch1,
Rtags = R12_scratch2;
diff --git a/src/cpu/sparc/vm/assembler_sparc.hpp b/src/cpu/sparc/vm/assembler_sparc.hpp
index ffbc6f27c..db7ff9eca 100644
--- a/src/cpu/sparc/vm/assembler_sparc.hpp
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -123,8 +123,13 @@ class Assembler : public AbstractAssembler {
fpop2_op3 = 0x35,
impdep1_op3 = 0x36,
aes3_op3 = 0x36,
+ alignaddr_op3 = 0x36,
+ faligndata_op3 = 0x36,
flog3_op3 = 0x36,
+ edge_op3 = 0x36,
+ fsrc_op3 = 0x36,
impdep2_op3 = 0x37,
+ stpartialf_op3 = 0x37,
jmpl_op3 = 0x38,
rett_op3 = 0x39,
trap_op3 = 0x3a,
@@ -175,17 +180,23 @@ class Assembler : public AbstractAssembler {
enum opfs {
// selected opfs
+ edge8n_opf = 0x01,
+
fmovs_opf = 0x01,
fmovd_opf = 0x02,
fnegs_opf = 0x05,
fnegd_opf = 0x06,
+ alignaddr_opf = 0x18,
+
fadds_opf = 0x41,
faddd_opf = 0x42,
fsubs_opf = 0x45,
fsubd_opf = 0x46,
+ faligndata_opf = 0x48,
+
fmuls_opf = 0x49,
fmuld_opf = 0x4a,
fdivs_opf = 0x4d,
@@ -348,6 +359,8 @@ class Assembler : public AbstractAssembler {
ASI_PRIMARY = 0x80,
ASI_PRIMARY_NOFAULT = 0x82,
ASI_PRIMARY_LITTLE = 0x88,
+ // 8x8-bit partial store
+ ASI_PST8_PRIMARY = 0xC0,
// Block initializing store
ASI_ST_BLKINIT_PRIMARY = 0xE2,
// Most-Recently-Used (MRU) BIS variant
@@ -585,6 +598,9 @@ class Assembler : public AbstractAssembler {
// instruction only in VIS1
static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
+ // instruction only in VIS2
+ static void vis2_only() { assert( VM_Version::has_vis2(), "This instruction only works on SPARC with VIS2"); }
+
// instruction only in VIS3
static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); }
@@ -1164,6 +1180,20 @@ public:
inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
+ // VIS1 instructions
+
+ void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); }
+
+ void faligndata( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); }
+
+ void fsrc2( FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); }
+
+ void stpartialf( Register s1, Register s2, FloatRegister d, int ia = -1 ) { vis1_only(); emit_int32( op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); }
+
+ // VIS2 instructions
+
+ void edge8n( Register s1, Register s2, Register d ) { vis2_only(); emit_int32( op(arith_op) | rd(d) | op3(edge_op3) | rs1(s1) | opf(edge8n_opf) | rs2(s2)); }
+
// VIS3 instructions
void movstosw( FloatRegister s, Register d ) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
diff --git a/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/src/cpu/sparc/vm/stubGenerator_sparc.cpp
index 25023404d..aa81f9e4b 100644
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -3305,9 +3305,12 @@ class StubGenerator: public StubCodeGenerator {
}
address generate_aescrypt_encryptBlock() {
+ // required since we read expanded key 'int' array starting first element without alignment considerations
+ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
+ "the following code assumes that first element of an int array is aligned to 8 bytes");
__ align(CodeEntryAlignment);
- StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
- Label L_doLast128bit, L_storeOutput;
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+ Label L_load_misaligned_input, L_load_expanded_key, L_doLast128bit, L_storeOutput, L_store_misaligned_output;
address start = __ pc();
Register from = O0; // source byte array
Register to = O1; // destination byte array
@@ -3317,15 +3320,33 @@ class StubGenerator: public StubCodeGenerator {
// read expanded key length
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
- // load input into F54-F56; F30-F31 used as temp
- __ ldf(FloatRegisterImpl::S, from, 0, F30);
- __ ldf(FloatRegisterImpl::S, from, 4, F31);
- __ fmov(FloatRegisterImpl::D, F30, F54);
- __ ldf(FloatRegisterImpl::S, from, 8, F30);
- __ ldf(FloatRegisterImpl::S, from, 12, F31);
- __ fmov(FloatRegisterImpl::D, F30, F56);
-
- // load expanded key
+ // Method to address arbitrary alignment for load instructions:
+ // Check last 3 bits of 'from' address to see if it is aligned to 8-byte boundary
+ // If zero/aligned then continue with double FP load instructions
+ // If not zero/mis-aligned then alignaddr will set GSR.align with number of bytes to skip during faligndata
+ // alignaddr will also convert arbitrary aligned 'from' address to nearest 8-byte aligned address
+ // load 3 * 8-byte components (to read 16 bytes input) in 3 different FP regs starting at this aligned address
+ // faligndata will then extract (based on GSR.align value) the appropriate 8 bytes from the 2 source regs
+
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input);
+ __ delayed()->alignaddr(from, G0, from);
+
+ // aligned case: load input into F54-F56
+ __ ldf(FloatRegisterImpl::D, from, 0, F54);
+ __ ldf(FloatRegisterImpl::D, from, 8, F56);
+ __ ba_short(L_load_expanded_key);
+
+ __ BIND(L_load_misaligned_input);
+ __ ldf(FloatRegisterImpl::D, from, 0, F54);
+ __ ldf(FloatRegisterImpl::D, from, 8, F56);
+ __ ldf(FloatRegisterImpl::D, from, 16, F58);
+ __ faligndata(F54, F56, F54);
+ __ faligndata(F56, F58, F56);
+
+ __ BIND(L_load_expanded_key);
+ // Since we load expanded key buffers starting first element, 8-byte alignment is guaranteed
for ( int i = 0; i <= 38; i += 2 ) {
__ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
}
@@ -3365,8 +3386,7 @@ class StubGenerator: public StubCodeGenerator {
__ ldf(FloatRegisterImpl::D, key, 232, F50);
__ aes_eround01(F52, F54, F56, F58); //round 13
__ aes_eround23(F46, F54, F56, F60);
- __ br(Assembler::always, false, Assembler::pt, L_storeOutput);
- __ delayed()->nop();
+ __ ba_short(L_storeOutput);
__ BIND(L_doLast128bit);
__ ldf(FloatRegisterImpl::D, key, 160, F48);
@@ -3377,23 +3397,62 @@ class StubGenerator: public StubCodeGenerator {
__ aes_eround01_l(F48, F58, F60, F54); //last round
__ aes_eround23_l(F50, F58, F60, F56);
- // store output into the destination array, F0-F1 used as temp
- __ fmov(FloatRegisterImpl::D, F54, F0);
- __ stf(FloatRegisterImpl::S, F0, to, 0);
- __ stf(FloatRegisterImpl::S, F1, to, 4);
- __ fmov(FloatRegisterImpl::D, F56, F0);
- __ stf(FloatRegisterImpl::S, F0, to, 8);
+ // Method to address arbitrary alignment for store instructions:
+ // Check last 3 bits of 'dest' address to see if it is aligned to 8-byte boundary
+ // If zero/aligned then continue with double FP store instructions
+ // If not zero/mis-aligned then edge8n will generate edge mask in result reg (O3 in below case)
+ // Example: If dest address is 0x07 and nearest 8-byte aligned address is 0x00 then edge mask will be 00000001
+ // Compute (8-n) where n is # of bytes skipped by partial store(stpartialf) inst from edge mask, n=7 in this case
+ // We get the value of n from the andcc that checks 'dest' alignment. n is available in O5 in below case.
+ // Set GSR.align to (8-n) using alignaddr
+ // Circular byte shift store values by n places so that the original bytes are at correct position for stpartialf
+ // Set the arbitrarily aligned 'dest' address to nearest 8-byte aligned address
+ // Store (partial) the original first (8-n) bytes starting at the original 'dest' address
+ // Negate the edge mask so that the subsequent stpartialf can store the original (8-n-1)th through 8th bytes at appropriate address
+ // We need to execute this process for both the 8-byte result values
+
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, O5);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output);
+ __ delayed()->edge8n(to, G0, O3);
+
+ // aligned case: store output into the destination array
+ __ stf(FloatRegisterImpl::D, F54, to, 0);
__ retl();
- __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+ __ delayed()->stf(FloatRegisterImpl::D, F56, to, 8);
+
+ __ BIND(L_store_misaligned_output);
+ __ add(to, 8, O4);
+ __ mov(8, O2);
+ __ sub(O2, O5, O2);
+ __ alignaddr(O2, G0, O2);
+ __ faligndata(F54, F54, F54);
+ __ faligndata(F56, F56, F56);
+ __ and3(to, -8, to);
+ __ and3(O4, -8, O4);
+ __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY);
+ __ add(to, 8, to);
+ __ add(O4, 8, O4);
+ __ orn(G0, O3, O3);
+ __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY);
+ __ retl();
+ __ delayed()->stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY);
return start;
}
address generate_aescrypt_decryptBlock() {
+ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
+ "the following code assumes that first element of an int array is aligned to 8 bytes");
+ // required since we read original key 'byte' array as well in the decryption stubs
+ assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
+ "the following code assumes that first element of a byte array is aligned to 8 bytes");
__ align(CodeEntryAlignment);
- StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
address start = __ pc();
- Label L_expand192bit, L_expand256bit, L_common_transform;
+ Label L_load_misaligned_input, L_load_original_key, L_expand192bit, L_expand256bit, L_reload_misaligned_input;
+ Label L_256bit_transform, L_common_transform, L_store_misaligned_output;
Register from = O0; // source byte array
Register to = O1; // destination byte array
Register key = O2; // expanded key array
@@ -3403,15 +3462,29 @@ class StubGenerator: public StubCodeGenerator {
// read expanded key array length
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
- // load input into F52-F54; F30,F31 used as temp
- __ ldf(FloatRegisterImpl::S, from, 0, F30);
- __ ldf(FloatRegisterImpl::S, from, 4, F31);
- __ fmov(FloatRegisterImpl::D, F30, F52);
- __ ldf(FloatRegisterImpl::S, from, 8, F30);
- __ ldf(FloatRegisterImpl::S, from, 12, F31);
- __ fmov(FloatRegisterImpl::D, F30, F54);
+ // save 'from' since we may need to recheck alignment in case of 256-bit decryption
+ __ mov(from, G1);
+
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input);
+ __ delayed()->alignaddr(from, G0, from);
+
+ // aligned case: load input into F52-F54
+ __ ldf(FloatRegisterImpl::D, from, 0, F52);
+ __ ldf(FloatRegisterImpl::D, from, 8, F54);
+ __ ba_short(L_load_original_key);
+ __ BIND(L_load_misaligned_input);
+ __ ldf(FloatRegisterImpl::D, from, 0, F52);
+ __ ldf(FloatRegisterImpl::D, from, 8, F54);
+ __ ldf(FloatRegisterImpl::D, from, 16, F56);
+ __ faligndata(F52, F54, F52);
+ __ faligndata(F54, F56, F54);
+
+ __ BIND(L_load_original_key);
// load original key from SunJCE expanded decryption key
+ // Since we load original key buffer starting first element, 8-byte alignment is guaranteed
for ( int i = 0; i <= 3; i++ ) {
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
}
@@ -3432,8 +3505,7 @@ class StubGenerator: public StubCodeGenerator {
// perform 128-bit key specific inverse cipher transformation
__ fxor(FloatRegisterImpl::D, F42, F54, F54);
__ fxor(FloatRegisterImpl::D, F40, F52, F52);
- __ br(Assembler::always, false, Assembler::pt, L_common_transform);
- __ delayed()->nop();
+ __ ba_short(L_common_transform);
__ BIND(L_expand192bit);
@@ -3457,8 +3529,7 @@ class StubGenerator: public StubCodeGenerator {
__ aes_dround01(F44, F52, F54, F56);
__ aes_dround23(F42, F56, F58, F54);
__ aes_dround01(F40, F56, F58, F52);
- __ br(Assembler::always, false, Assembler::pt, L_common_transform);
- __ delayed()->nop();
+ __ ba_short(L_common_transform);
__ BIND(L_expand256bit);
@@ -3478,14 +3549,31 @@ class StubGenerator: public StubCodeGenerator {
__ aes_kexpand2(F50, F56, F58);
for ( int i = 0; i <= 6; i += 2 ) {
- __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
+ __ fsrc2(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
}
- // load input into F52-F54
+ // reload original 'from' address
+ __ mov(G1, from);
+
+ // re-check 8-byte alignment
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_reload_misaligned_input);
+ __ delayed()->alignaddr(from, G0, from);
+
+ // aligned case: load input into F52-F54
+ __ ldf(FloatRegisterImpl::D, from, 0, F52);
+ __ ldf(FloatRegisterImpl::D, from, 8, F54);
+ __ ba_short(L_256bit_transform);
+
+ __ BIND(L_reload_misaligned_input);
__ ldf(FloatRegisterImpl::D, from, 0, F52);
__ ldf(FloatRegisterImpl::D, from, 8, F54);
+ __ ldf(FloatRegisterImpl::D, from, 16, F56);
+ __ faligndata(F52, F54, F52);
+ __ faligndata(F54, F56, F54);
// perform 256-bit key specific inverse cipher transformation
+ __ BIND(L_256bit_transform);
__ fxor(FloatRegisterImpl::D, F0, F54, F54);
__ fxor(FloatRegisterImpl::D, F2, F52, F52);
__ aes_dround23(F4, F52, F54, F58);
@@ -3515,43 +3603,71 @@ class StubGenerator: public StubCodeGenerator {
}
}
- // store output to destination array, F0-F1 used as temp
- __ fmov(FloatRegisterImpl::D, F52, F0);
- __ stf(FloatRegisterImpl::S, F0, to, 0);
- __ stf(FloatRegisterImpl::S, F1, to, 4);
- __ fmov(FloatRegisterImpl::D, F54, F0);
- __ stf(FloatRegisterImpl::S, F0, to, 8);
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, O5);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output);
+ __ delayed()->edge8n(to, G0, O3);
+
+ // aligned case: store output into the destination array
+ __ stf(FloatRegisterImpl::D, F52, to, 0);
+ __ retl();
+ __ delayed()->stf(FloatRegisterImpl::D, F54, to, 8);
+
+ __ BIND(L_store_misaligned_output);
+ __ add(to, 8, O4);
+ __ mov(8, O2);
+ __ sub(O2, O5, O2);
+ __ alignaddr(O2, G0, O2);
+ __ faligndata(F52, F52, F52);
+ __ faligndata(F54, F54, F54);
+ __ and3(to, -8, to);
+ __ and3(O4, -8, O4);
+ __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY);
+ __ add(to, 8, to);
+ __ add(O4, 8, O4);
+ __ orn(G0, O3, O3);
+ __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY);
__ retl();
- __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+ __ delayed()->stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY);
return start;
}
address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
+ "the following code assumes that first element of an int array is aligned to 8 bytes");
+ assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
+ "the following code assumes that first element of a byte array is aligned to 8 bytes");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
- Label L_cbcenc128, L_cbcenc192, L_cbcenc256;
+ Label L_cbcenc128, L_load_misaligned_input_128bit, L_128bit_transform, L_store_misaligned_output_128bit;
+ Label L_check_loop_end_128bit, L_cbcenc192, L_load_misaligned_input_192bit, L_192bit_transform;
+ Label L_store_misaligned_output_192bit, L_check_loop_end_192bit, L_cbcenc256, L_load_misaligned_input_256bit;
+ Label L_256bit_transform, L_store_misaligned_output_256bit, L_check_loop_end_256bit;
address start = __ pc();
- Register from = O0; // source byte array
- Register to = O1; // destination byte array
- Register key = O2; // expanded key array
- Register rvec = O3; // init vector
- const Register len_reg = O4; // cipher length
- const Register keylen = O5; // reg for storing expanded key array length
+ Register from = I0; // source byte array
+ Register to = I1; // destination byte array
+ Register key = I2; // expanded key array
+ Register rvec = I3; // init vector
+ const Register len_reg = I4; // cipher length
+ const Register keylen = I5; // reg for storing expanded key array length
- // save cipher len to return in the end
- __ mov(len_reg, L1);
+ // save cipher len before save_frame, to return in the end
+ __ mov(O4, L0);
+ __ save_frame(0);
// read expanded key length
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
- // load init vector
+ // load initial vector, 8-byte alignment is guranteed
__ ldf(FloatRegisterImpl::D, rvec, 0, F60);
__ ldf(FloatRegisterImpl::D, rvec, 8, F62);
+ // load key, 8-byte alignment is guranteed
__ ldx(key,0,G1);
- __ ldx(key,8,G2);
+ __ ldx(key,8,G5);
- // start loading expanded key
+ // start loading expanded key, 8-byte alignment is guranteed
for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) {
__ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
}
@@ -3571,15 +3687,35 @@ class StubGenerator: public StubCodeGenerator {
}
// 256-bit original key size
- __ br(Assembler::always, false, Assembler::pt, L_cbcenc256);
- __ delayed()->nop();
+ __ ba_short(L_cbcenc256);
__ align(OptoLoopAlignment);
__ BIND(L_cbcenc128);
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_128bit);
+ __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
+
+ // aligned case: load input into G3 and G4
__ ldx(from,0,G3);
__ ldx(from,8,G4);
+ __ ba_short(L_128bit_transform);
+
+ __ BIND(L_load_misaligned_input_128bit);
+ // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption
+ __ alignaddr(from, G0, from);
+ __ ldf(FloatRegisterImpl::D, from, 0, F48);
+ __ ldf(FloatRegisterImpl::D, from, 8, F50);
+ __ ldf(FloatRegisterImpl::D, from, 16, F52);
+ __ faligndata(F48, F50, F48);
+ __ faligndata(F50, F52, F50);
+ __ movdtox(F48, G3);
+ __ movdtox(F50, G4);
+ __ mov(L1, from);
+
+ __ BIND(L_128bit_transform);
__ xor3(G1,G3,G3);
- __ xor3(G2,G4,G4);
+ __ xor3(G5,G4,G4);
__ movxtod(G3,F56);
__ movxtod(G4,F58);
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
@@ -3598,24 +3734,81 @@ class StubGenerator: public StubCodeGenerator {
}
}
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, L1);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_128bit);
+ __ delayed()->edge8n(to, G0, L2);
+
+ // aligned case: store output into the destination array
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
+ __ ba_short(L_check_loop_end_128bit);
+
+ __ BIND(L_store_misaligned_output_128bit);
+ __ add(to, 8, L3);
+ __ mov(8, L4);
+ __ sub(L4, L1, L4);
+ __ alignaddr(L4, G0, L4);
+ // save cipher text before circular right shift
+ // as it needs to be stored as iv for next block (see code before next retl)
+ __ movdtox(F60, L6);
+ __ movdtox(F62, L7);
+ __ faligndata(F60, F60, F60);
+ __ faligndata(F62, F62, F62);
+ __ mov(to, L5);
+ __ and3(to, -8, to);
+ __ and3(L3, -8, L3);
+ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ add(to, 8, to);
+ __ add(L3, 8, L3);
+ __ orn(G0, L2, L2);
+ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ mov(L5, to);
+ __ movxtod(L6, F60);
+ __ movxtod(L7, F62);
+
+ __ BIND(L_check_loop_end_128bit);
__ add(from, 16, from);
__ add(to, 16, to);
__ subcc(len_reg, 16, len_reg);
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
__ delayed()->nop();
+ // re-init intial vector for next block, 8-byte alignment is guaranteed
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
+ __ restore();
__ retl();
- __ delayed()->mov(L1, O0);
+ __ delayed()->mov(L0, O0);
__ align(OptoLoopAlignment);
__ BIND(L_cbcenc192);
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_192bit);
+ __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
+
+ // aligned case: load input into G3 and G4
__ ldx(from,0,G3);
__ ldx(from,8,G4);
+ __ ba_short(L_192bit_transform);
+
+ __ BIND(L_load_misaligned_input_192bit);
+ // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption
+ __ alignaddr(from, G0, from);
+ __ ldf(FloatRegisterImpl::D, from, 0, F48);
+ __ ldf(FloatRegisterImpl::D, from, 8, F50);
+ __ ldf(FloatRegisterImpl::D, from, 16, F52);
+ __ faligndata(F48, F50, F48);
+ __ faligndata(F50, F52, F50);
+ __ movdtox(F48, G3);
+ __ movdtox(F50, G4);
+ __ mov(L1, from);
+
+ __ BIND(L_192bit_transform);
__ xor3(G1,G3,G3);
- __ xor3(G2,G4,G4);
+ __ xor3(G5,G4,G4);
__ movxtod(G3,F56);
__ movxtod(G4,F58);
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
@@ -3634,24 +3827,81 @@ class StubGenerator: public StubCodeGenerator {
}
}
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, L1);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_192bit);
+ __ delayed()->edge8n(to, G0, L2);
+
+ // aligned case: store output into the destination array
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
+ __ ba_short(L_check_loop_end_192bit);
+
+ __ BIND(L_store_misaligned_output_192bit);
+ __ add(to, 8, L3);
+ __ mov(8, L4);
+ __ sub(L4, L1, L4);
+ __ alignaddr(L4, G0, L4);
+ __ movdtox(F60, L6);
+ __ movdtox(F62, L7);
+ __ faligndata(F60, F60, F60);
+ __ faligndata(F62, F62, F62);
+ __ mov(to, L5);
+ __ and3(to, -8, to);
+ __ and3(L3, -8, L3);
+ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ add(to, 8, to);
+ __ add(L3, 8, L3);
+ __ orn(G0, L2, L2);
+ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ mov(L5, to);
+ __ movxtod(L6, F60);
+ __ movxtod(L7, F62);
+
+ __ BIND(L_check_loop_end_192bit);
__ add(from, 16, from);
__ subcc(len_reg, 16, len_reg);
__ add(to, 16, to);
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
__ delayed()->nop();
+ // re-init intial vector for next block, 8-byte alignment is guaranteed
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
+ __ restore();
__ retl();
- __ delayed()->mov(L1, O0);
+ __ delayed()->mov(L0, O0);
__ align(OptoLoopAlignment);
__ BIND(L_cbcenc256);
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_256bit);
+ __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
+
+ // aligned case: load input into G3 and G4
__ ldx(from,0,G3);
__ ldx(from,8,G4);
+ __ ba_short(L_256bit_transform);
+
+ __ BIND(L_load_misaligned_input_256bit);
+ // cannot clobber F48, F50 and F52. F56, F58 can be used though
+ __ alignaddr(from, G0, from);
+ __ movdtox(F60, L2); // save F60 before overwriting
+ __ ldf(FloatRegisterImpl::D, from, 0, F56);
+ __ ldf(FloatRegisterImpl::D, from, 8, F58);
+ __ ldf(FloatRegisterImpl::D, from, 16, F60);
+ __ faligndata(F56, F58, F56);
+ __ faligndata(F58, F60, F58);
+ __ movdtox(F56, G3);
+ __ movdtox(F58, G4);
+ __ mov(L1, from);
+ __ movxtod(L2, F60);
+
+ __ BIND(L_256bit_transform);
__ xor3(G1,G3,G3);
- __ xor3(G2,G4,G4);
+ __ xor3(G5,G4,G4);
__ movxtod(G3,F56);
__ movxtod(G4,F58);
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
@@ -3670,26 +3920,69 @@ class StubGenerator: public StubCodeGenerator {
}
}
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, L1);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_256bit);
+ __ delayed()->edge8n(to, G0, L2);
+
+ // aligned case: store output into the destination array
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
+ __ ba_short(L_check_loop_end_256bit);
+
+ __ BIND(L_store_misaligned_output_256bit);
+ __ add(to, 8, L3);
+ __ mov(8, L4);
+ __ sub(L4, L1, L4);
+ __ alignaddr(L4, G0, L4);
+ __ movdtox(F60, L6);
+ __ movdtox(F62, L7);
+ __ faligndata(F60, F60, F60);
+ __ faligndata(F62, F62, F62);
+ __ mov(to, L5);
+ __ and3(to, -8, to);
+ __ and3(L3, -8, L3);
+ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ add(to, 8, to);
+ __ add(L3, 8, L3);
+ __ orn(G0, L2, L2);
+ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ mov(L5, to);
+ __ movxtod(L6, F60);
+ __ movxtod(L7, F62);
+
+ __ BIND(L_check_loop_end_256bit);
__ add(from, 16, from);
__ subcc(len_reg, 16, len_reg);
__ add(to, 16, to);
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
__ delayed()->nop();
+ // re-init intial vector for next block, 8-byte alignment is guaranteed
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
+ __ restore();
__ retl();
- __ delayed()->mov(L1, O0);
+ __ delayed()->mov(L0, O0);
return start;
}
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
+ "the following code assumes that first element of an int array is aligned to 8 bytes");
+ assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
+ "the following code assumes that first element of a byte array is aligned to 8 bytes");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
+ Label L_load_misaligned_input_first_block, L_transform_first_block, L_load_misaligned_next2_blocks128, L_transform_next2_blocks128;
+ Label L_load_misaligned_next2_blocks192, L_transform_next2_blocks192, L_load_misaligned_next2_blocks256, L_transform_next2_blocks256;
+ Label L_store_misaligned_output_first_block, L_check_decrypt_end, L_store_misaligned_output_next2_blocks128;
+ Label L_check_decrypt_loop_end128, L_store_misaligned_output_next2_blocks192, L_check_decrypt_loop_end192;
+ Label L_store_misaligned_output_next2_blocks256, L_check_decrypt_loop_end256;
address start = __ pc();
Register from = I0; // source byte array
Register to = I1; // destination byte array
@@ -3704,11 +3997,12 @@ class StubGenerator: public StubCodeGenerator {
__ save_frame(0); //args are read from I* registers since we save the frame in the beginning
// load original key from SunJCE expanded decryption key
+ // Since we load original key buffer starting first element, 8-byte alignment is guaranteed
for ( int i = 0; i <= 3; i++ ) {
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
}
- // load initial vector
+ // load initial vector, 8-byte alignment is guaranteed
__ ldx(rvec,0,L0);
__ ldx(rvec,8,L1);
@@ -3733,11 +4027,10 @@ class StubGenerator: public StubCodeGenerator {
__ movdtox(F42,L3);
__ and3(len_reg, 16, L4);
- __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128);
- __ delayed()->nop();
+ __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks128);
+ __ nop();
- __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
- __ delayed()->nop();
+ __ ba_short(L_dec_first_block_start);
__ BIND(L_expand192bit);
// load rest of the 192-bit key
@@ -3758,11 +4051,10 @@ class StubGenerator: public StubCodeGenerator {
__ movdtox(F50,L3);
__ and3(len_reg, 16, L4);
- __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192);
- __ delayed()->nop();
+ __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks192);
+ __ nop();
- __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
- __ delayed()->nop();
+ __ ba_short(L_dec_first_block_start);
__ BIND(L_expand256bit);
// load rest of the 256-bit key
@@ -3785,12 +4077,32 @@ class StubGenerator: public StubCodeGenerator {
__ movdtox(F58,L3);
__ and3(len_reg, 16, L4);
- __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256);
- __ delayed()->nop();
+ __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks256);
__ BIND(L_dec_first_block_start);
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_first_block);
+ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
+
+ // aligned case: load input into L4 and L5
__ ldx(from,0,L4);
__ ldx(from,8,L5);
+ __ ba_short(L_transform_first_block);
+
+ __ BIND(L_load_misaligned_input_first_block);
+ __ alignaddr(from, G0, from);
+ // F58, F60, F62 can be clobbered
+ __ ldf(FloatRegisterImpl::D, from, 0, F58);
+ __ ldf(FloatRegisterImpl::D, from, 8, F60);
+ __ ldf(FloatRegisterImpl::D, from, 16, F62);
+ __ faligndata(F58, F60, F58);
+ __ faligndata(F60, F62, F60);
+ __ movdtox(F58, L4);
+ __ movdtox(F60, L5);
+ __ mov(G1, from);
+
+ __ BIND(L_transform_first_block);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
@@ -3833,9 +4145,36 @@ class StubGenerator: public StubCodeGenerator {
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, G1);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_first_block);
+ __ delayed()->edge8n(to, G0, G2);
+
+ // aligned case: store output into the destination array
__ stf(FloatRegisterImpl::D, F60, to, 0);
__ stf(FloatRegisterImpl::D, F62, to, 8);
-
+ __ ba_short(L_check_decrypt_end);
+
+ __ BIND(L_store_misaligned_output_first_block);
+ __ add(to, 8, G3);
+ __ mov(8, G4);
+ __ sub(G4, G1, G4);
+ __ alignaddr(G4, G0, G4);
+ __ faligndata(F60, F60, F60);
+ __ faligndata(F62, F62, F62);
+ __ mov(to, G1);
+ __ and3(to, -8, to);
+ __ and3(G3, -8, G3);
+ __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ add(to, 8, to);
+ __ add(G3, 8, G3);
+ __ orn(G0, G2, G2);
+ __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY);
+ __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY);
+ __ mov(G1, to);
+
+ __ BIND(L_check_decrypt_end);
__ add(from, 16, from);
__ add(to, 16, to);
__ subcc(len_reg, 16, len_reg);
@@ -3852,17 +4191,44 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_dec_next2_blocks128);
__ nop();
- // F40:F42 used for first 16-bytes
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks128);
+ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
+
+ // aligned case: load input into G4, G5, L4 and L5
__ ldx(from,0,G4);
__ ldx(from,8,G5);
+ __ ldx(from,16,L4);
+ __ ldx(from,24,L5);
+ __ ba_short(L_transform_next2_blocks128);
+
+ __ BIND(L_load_misaligned_next2_blocks128);
+ __ alignaddr(from, G0, from);
+ // F40, F42, F58, F60, F62 can be clobbered
+ __ ldf(FloatRegisterImpl::D, from, 0, F40);
+ __ ldf(FloatRegisterImpl::D, from, 8, F42);
+ __ ldf(FloatRegisterImpl::D, from, 16, F60);
+ __ ldf(FloatRegisterImpl::D, from, 24, F62);
+ __ ldf(FloatRegisterImpl::D, from, 32, F58);
+ __ faligndata(F40, F42, F40);
+ __ faligndata(F42, F60, F42);
+ __ faligndata(F60, F62, F60);
+ __ faligndata(F62, F58, F62);
+ __ movdtox(F40, G4);
+ __ movdtox(F42, G5);
+ __ movdtox(F60, L4);
+ __ movdtox(F62, L5);
+ __ mov(G1, from);
+
+ __ BIND(L_transform_next2_blocks128);
+ // F40:F42 used for first 16-bytes
__ xor3(L2,G4,G1);
__ movxtod(G1,F40);
__ xor3(L3,G5,G1);
__ movxtod(G1,F42);
// F60:F62 used for next 16-bytes
- __ ldx(from,16,L4);
- __ ldx(from,24,L5);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
@@ -3891,9 +4257,6 @@ class StubGenerator: public StubCodeGenerator {
__ fxor(FloatRegisterImpl::D, F46, F40, F40);
__ fxor(FloatRegisterImpl::D, F44, F42, F42);
- __ stf(FloatRegisterImpl::D, F40, to, 0);
- __ stf(FloatRegisterImpl::D, F42, to, 8);
-
__ movxtod(G4,F56);
__ movxtod(G5,F58);
__ mov(L4,L0);
@@ -3901,32 +4264,93 @@ class StubGenerator: public StubCodeGenerator {
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
+ // For mis-aligned store of 32 bytes of result we can do:
+ // Circular right-shift all 4 FP registers so that 'head' and 'tail'
+ // parts that need to be stored starting at mis-aligned address are in a FP reg
+ // the other 3 FP regs can thus be stored using regular store
+ // we then use the edge + partial-store mechanism to store the 'head' and 'tail' parts
+
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, G1);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks128);
+ __ delayed()->edge8n(to, G0, G2);
+
+ // aligned case: store output into the destination array
+ __ stf(FloatRegisterImpl::D, F40, to, 0);
+ __ stf(FloatRegisterImpl::D, F42, to, 8);
__ stf(FloatRegisterImpl::D, F60, to, 16);
__ stf(FloatRegisterImpl::D, F62, to, 24);
+ __ ba_short(L_check_decrypt_loop_end128);
+
+ __ BIND(L_store_misaligned_output_next2_blocks128);
+ __ mov(8, G4);
+ __ sub(G4, G1, G4);
+ __ alignaddr(G4, G0, G4);
+ __ faligndata(F40, F42, F56); // F56 can be clobbered
+ __ faligndata(F42, F60, F42);
+ __ faligndata(F60, F62, F60);
+ __ faligndata(F62, F40, F40);
+ __ mov(to, G1);
+ __ and3(to, -8, to);
+ __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY);
+ __ stf(FloatRegisterImpl::D, F56, to, 8);
+ __ stf(FloatRegisterImpl::D, F42, to, 16);
+ __ stf(FloatRegisterImpl::D, F60, to, 24);
+ __ add(to, 32, to);
+ __ orn(G0, G2, G2);
+ __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY);
+ __ mov(G1, to);
+ __ BIND(L_check_decrypt_loop_end128);
__ add(from, 32, from);
__ add(to, 32, to);
__ subcc(len_reg, 32, len_reg);
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
__ delayed()->nop();
- __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
- __ delayed()->nop();
+ __ ba_short(L_cbcdec_end);
__ align(OptoLoopAlignment);
__ BIND(L_dec_next2_blocks192);
__ nop();
- // F48:F50 used for first 16-bytes
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks192);
+ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
+
+ // aligned case: load input into G4, G5, L4 and L5
__ ldx(from,0,G4);
__ ldx(from,8,G5);
+ __ ldx(from,16,L4);
+ __ ldx(from,24,L5);
+ __ ba_short(L_transform_next2_blocks192);
+
+ __ BIND(L_load_misaligned_next2_blocks192);
+ __ alignaddr(from, G0, from);
+ // F48, F50, F52, F60, F62 can be clobbered
+ __ ldf(FloatRegisterImpl::D, from, 0, F48);
+ __ ldf(FloatRegisterImpl::D, from, 8, F50);
+ __ ldf(FloatRegisterImpl::D, from, 16, F60);
+ __ ldf(FloatRegisterImpl::D, from, 24, F62);
+ __ ldf(FloatRegisterImpl::D, from, 32, F52);
+ __ faligndata(F48, F50, F48);
+ __ faligndata(F50, F60, F50);
+ __ faligndata(F60, F62, F60);
+ __ faligndata(F62, F52, F62);
+ __ movdtox(F48, G4);
+ __ movdtox(F50, G5);
+ __ movdtox(F60, L4);
+ __ movdtox(F62, L5);
+ __ mov(G1, from);
+
+ __ BIND(L_transform_next2_blocks192);
+ // F48:F50 used for first 16-bytes
__ xor3(L2,G4,G1);
__ movxtod(G1,F48);
__ xor3(L3,G5,G1);
__ movxtod(G1,F50);
// F60:F62 used for next 16-bytes
- __ ldx(from,16,L4);
- __ ldx(from,24,L5);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
@@ -3955,9 +4379,6 @@ class StubGenerator: public StubCodeGenerator {
__ fxor(FloatRegisterImpl::D, F54, F48, F48);
__ fxor(FloatRegisterImpl::D, F52, F50, F50);
- __ stf(FloatRegisterImpl::D, F48, to, 0);
- __ stf(FloatRegisterImpl::D, F50, to, 8);
-
__ movxtod(G4,F56);
__ movxtod(G5,F58);
__ mov(L4,L0);
@@ -3965,32 +4386,87 @@ class StubGenerator: public StubCodeGenerator {
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, G1);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks192);
+ __ delayed()->edge8n(to, G0, G2);
+
+ // aligned case: store output into the destination array
+ __ stf(FloatRegisterImpl::D, F48, to, 0);
+ __ stf(FloatRegisterImpl::D, F50, to, 8);
__ stf(FloatRegisterImpl::D, F60, to, 16);
__ stf(FloatRegisterImpl::D, F62, to, 24);
+ __ ba_short(L_check_decrypt_loop_end192);
+
+ __ BIND(L_store_misaligned_output_next2_blocks192);
+ __ mov(8, G4);
+ __ sub(G4, G1, G4);
+ __ alignaddr(G4, G0, G4);
+ __ faligndata(F48, F50, F56); // F56 can be clobbered
+ __ faligndata(F50, F60, F50);
+ __ faligndata(F60, F62, F60);
+ __ faligndata(F62, F48, F48);
+ __ mov(to, G1);
+ __ and3(to, -8, to);
+ __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY);
+ __ stf(FloatRegisterImpl::D, F56, to, 8);
+ __ stf(FloatRegisterImpl::D, F50, to, 16);
+ __ stf(FloatRegisterImpl::D, F60, to, 24);
+ __ add(to, 32, to);
+ __ orn(G0, G2, G2);
+ __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY);
+ __ mov(G1, to);
+ __ BIND(L_check_decrypt_loop_end192);
__ add(from, 32, from);
__ add(to, 32, to);
__ subcc(len_reg, 32, len_reg);
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
__ delayed()->nop();
- __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
- __ delayed()->nop();
+ __ ba_short(L_cbcdec_end);
__ align(OptoLoopAlignment);
__ BIND(L_dec_next2_blocks256);
__ nop();
- // F0:F2 used for first 16-bytes
+ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(from, 7, G0);
+ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks256);
+ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
+
+ // aligned case: load input into G4, G5, L4 and L5
__ ldx(from,0,G4);
__ ldx(from,8,G5);
+ __ ldx(from,16,L4);
+ __ ldx(from,24,L5);
+ __ ba_short(L_transform_next2_blocks256);
+
+ __ BIND(L_load_misaligned_next2_blocks256);
+ __ alignaddr(from, G0, from);
+ // F0, F2, F4, F60, F62 can be clobbered
+ __ ldf(FloatRegisterImpl::D, from, 0, F0);
+ __ ldf(FloatRegisterImpl::D, from, 8, F2);
+ __ ldf(FloatRegisterImpl::D, from, 16, F60);
+ __ ldf(FloatRegisterImpl::D, from, 24, F62);
+ __ ldf(FloatRegisterImpl::D, from, 32, F4);
+ __ faligndata(F0, F2, F0);
+ __ faligndata(F2, F60, F2);
+ __ faligndata(F60, F62, F60);
+ __ faligndata(F62, F4, F62);
+ __ movdtox(F0, G4);
+ __ movdtox(F2, G5);
+ __ movdtox(F60, L4);
+ __ movdtox(F62, L5);
+ __ mov(G1, from);
+
+ __ BIND(L_transform_next2_blocks256);
+ // F0:F2 used for first 16-bytes
__ xor3(L2,G4,G1);
__ movxtod(G1,F0);
__ xor3(L3,G5,G1);
__ movxtod(G1,F2);
// F60:F62 used for next 16-bytes
- __ ldx(from,16,L4);
- __ ldx(from,24,L5);
__ xor3(L2,L4,G1);
__ movxtod(G1,F60);
__ xor3(L3,L5,G1);
@@ -4043,9 +4519,6 @@ class StubGenerator: public StubCodeGenerator {
__ fxor(FloatRegisterImpl::D, F6, F0, F0);
__ fxor(FloatRegisterImpl::D, F4, F2, F2);
- __ stf(FloatRegisterImpl::D, F0, to, 0);
- __ stf(FloatRegisterImpl::D, F2, to, 8);
-
__ movxtod(G4,F56);
__ movxtod(G5,F58);
__ mov(L4,L0);
@@ -4053,9 +4526,38 @@ class StubGenerator: public StubCodeGenerator {
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
+ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
+ __ andcc(to, 7, G1);
+ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks256);
+ __ delayed()->edge8n(to, G0, G2);
+
+ // aligned case: store output into the destination array
+ __ stf(FloatRegisterImpl::D, F0, to, 0);
+ __ stf(FloatRegisterImpl::D, F2, to, 8);
__ stf(FloatRegisterImpl::D, F60, to, 16);
__ stf(FloatRegisterImpl::D, F62, to, 24);
+ __ ba_short(L_check_decrypt_loop_end256);
+
+ __ BIND(L_store_misaligned_output_next2_blocks256);
+ __ mov(8, G4);
+ __ sub(G4, G1, G4);
+ __ alignaddr(G4, G0, G4);
+ __ faligndata(F0, F2, F56); // F56 can be clobbered
+ __ faligndata(F2, F60, F2);
+ __ faligndata(F60, F62, F60);
+ __ faligndata(F62, F0, F0);
+ __ mov(to, G1);
+ __ and3(to, -8, to);
+ __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY);
+ __ stf(FloatRegisterImpl::D, F56, to, 8);
+ __ stf(FloatRegisterImpl::D, F2, to, 16);
+ __ stf(FloatRegisterImpl::D, F60, to, 24);
+ __ add(to, 32, to);
+ __ orn(G0, G2, G2);
+ __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY);
+ __ mov(G1, to);
+ __ BIND(L_check_decrypt_loop_end256);
__ add(from, 32, from);
__ add(to, 32, to);
__ subcc(len_reg, 32, len_reg);
@@ -4063,6 +4565,7 @@ class StubGenerator: public StubCodeGenerator {
__ delayed()->nop();
__ BIND(L_cbcdec_end);
+ // re-init intial vector for next block, 8-byte alignment is guaranteed
__ stx(L0, rvec, 0);
__ stx(L1, rvec, 8);
__ restore();
diff --git a/src/cpu/sparc/vm/stubRoutines_sparc.hpp b/src/cpu/sparc/vm/stubRoutines_sparc.hpp
index a94f5977f..880a02619 100644
--- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp
@@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) {
enum /* platform_dependent_constants */ {
// %%%%%%%% May be able to shrink this a lot
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
- code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
+ code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
};
class Sparc {
diff --git a/src/cpu/sparc/vm/vm_version_sparc.cpp b/src/cpu/sparc/vm/vm_version_sparc.cpp
index b75d21f98..129bcd8b6 100644
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -266,9 +266,9 @@ void VM_Version::initialize() {
if (!has_vis1()) // Drop to 0 if no VIS1 support
UseVIS = 0;
- // T2 and above should have support for AES instructions
+ // SPARC T4 and above should have support for AES instructions
if (has_aes()) {
- if (UseVIS > 0) { // AES intrinsics use FXOR instruction which is VIS1
+ if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3
if (FLAG_IS_DEFAULT(UseAES)) {
FLAG_SET_DEFAULT(UseAES, true);
}
@@ -282,7 +282,7 @@ void VM_Version::initialize() {
}
} else {
if (UseAES || UseAESIntrinsics) {
- warning("SPARC AES intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
+ warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
if (UseAES) {
FLAG_SET_DEFAULT(UseAES, false);
}
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 2fc29eae2..a89e50b65 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -1766,7 +1766,7 @@ void Assembler::movdqu(Address dst, XMMRegister src) {
// Move Unaligned 256bit Vector
void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
- assert(UseAVX, "");
+ assert(UseAVX > 0, "");
bool vector256 = true;
int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
emit_int8(0x6F);
@@ -1774,7 +1774,7 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
}
void Assembler::vmovdqu(XMMRegister dst, Address src) {
- assert(UseAVX, "");
+ assert(UseAVX > 0, "");
InstructionMark im(this);
bool vector256 = true;
vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
@@ -1783,7 +1783,7 @@ void Assembler::vmovdqu(XMMRegister dst, Address src) {
}
void Assembler::vmovdqu(Address dst, XMMRegister src) {
- assert(UseAVX, "");
+ assert(UseAVX > 0, "");
InstructionMark im(this);
bool vector256 = true;
// swap src<->dst for encoding
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index ba5fcb383..1fc0e614b 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -263,6 +263,10 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
// and check upper YMM bits after it.
//
VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
+ intx saved_useavx = UseAVX;
+ intx saved_usesse = UseSSE;
+ UseAVX = 1;
+ UseSSE = 2;
// load value into all 32 bytes of ymm7 register
__ movl(rcx, VM_Version::ymm_test_value());
@@ -292,6 +296,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
#endif
VM_Version::clean_cpuFeatures();
+ UseAVX = saved_useavx;
+ UseSSE = saved_usesse;
//
// cpuid(0x7) Structured Extended Features
diff --git a/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp b/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp
index d9a804896..07b212bae 100644
--- a/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp
+++ b/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp
@@ -53,41 +53,41 @@ inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *
inline jlong Atomic::load(volatile jlong* src) { return *src; }
-/*
- machine barrier instructions:
-
- - sync two-way memory barrier, aka fence
- - lwsync orders Store|Store,
- Load|Store,
- Load|Load,
- but not Store|Load
- - eieio orders memory accesses for device memory (only)
- - isync invalidates speculatively executed instructions
- From the POWER ISA 2.06 documentation:
- "[...] an isync instruction prevents the execution of
- instructions following the isync until instructions
- preceding the isync have completed, [...]"
- From IBM's AIX assembler reference:
- "The isync [...] instructions causes the processor to
- refetch any instructions that might have been fetched
- prior to the isync instruction. The instruction isync
- causes the processor to wait for all previous instructions
- to complete. Then any instructions already fetched are
- discarded and instruction processing continues in the
- environment established by the previous instructions."
-
- semantic barrier instructions:
- (as defined in orderAccess.hpp)
-
- - release orders Store|Store, (maps to lwsync)
- Load|Store
- - acquire orders Load|Store, (maps to lwsync)
- Load|Load
- - fence orders Store|Store, (maps to sync)
- Load|Store,
- Load|Load,
- Store|Load
-*/
+//
+// machine barrier instructions:
+//
+// - sync two-way memory barrier, aka fence
+// - lwsync orders Store|Store,
+// Load|Store,
+// Load|Load,
+// but not Store|Load
+// - eieio orders memory accesses for device memory (only)
+// - isync invalidates speculatively executed instructions
+// From the POWER ISA 2.06 documentation:
+// "[...] an isync instruction prevents the execution of
+// instructions following the isync until instructions
+// preceding the isync have completed, [...]"
+// From IBM's AIX assembler reference:
+// "The isync [...] instructions causes the processor to
+// refetch any instructions that might have been fetched
+// prior to the isync instruction. The instruction isync
+// causes the processor to wait for all previous instructions
+// to complete. Then any instructions already fetched are
+// discarded and instruction processing continues in the
+// environment established by the previous instructions."
+//
+// semantic barrier instructions:
+// (as defined in orderAccess.hpp)
+//
+// - release orders Store|Store, (maps to lwsync)
+// Load|Store
+// - acquire orders Load|Store, (maps to lwsync)
+// Load|Load
+// - fence orders Store|Store, (maps to sync)
+// Load|Store,
+// Load|Load,
+// Store|Load
+//
#define strasm_sync "\n sync \n"
#define strasm_lwsync "\n lwsync \n"
diff --git a/src/share/vm/ci/ciReplay.cpp b/src/share/vm/ci/ciReplay.cpp
index e88081509..6acde213d 100644
--- a/src/share/vm/ci/ciReplay.cpp
+++ b/src/share/vm/ci/ciReplay.cpp
@@ -376,11 +376,15 @@ class CompileReplay : public StackObj {
int c = getc(_stream);
while(c != EOF) {
c = get_line(c);
- process_command(CHECK);
+ process_command(THREAD);
if (had_error()) {
tty->print_cr("Error while parsing line %d: %s\n", line_no, _error_message);
- tty->print_cr("%s", _buffer);
- return;
+ if (ReplayIgnoreInitErrors) {
+ CLEAR_PENDING_EXCEPTION;
+ _error_message = NULL;
+ } else {
+ return;
+ }
}
line_no++;
}
@@ -565,10 +569,14 @@ class CompileReplay : public StackObj {
void process_ciMethodData(TRAPS) {
Method* method = parse_method(CHECK);
if (had_error()) return;
- /* jsut copied from Method, to build interpret data*/
+ /* just copied from Method, to build interpret data*/
if (InstanceRefKlass::owns_pending_list_lock((JavaThread*)THREAD)) {
return;
}
+ // To be properly initialized, some profiling in the MDO needs the
+ // method to be rewritten (number of arguments at a call for
+ // instance)
+ method->method_holder()->link_class(CHECK);
// methodOopDesc::build_interpreter_method_data(method, CHECK);
{
// Grab a lock here to prevent multiple
diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp
index ed3c0dbcb..f923c7ca4 100644
--- a/src/share/vm/classfile/vmSymbols.hpp
+++ b/src/share/vm/classfile/vmSymbols.hpp
@@ -774,7 +774,7 @@
/* java/lang/ref/Reference */ \
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\
- /* support for com.sum.crypto.provider.AESCrypt and some of its callers */ \
+ /* support for com.sun.crypto.provider.AESCrypt and some of its callers */ \
do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \
do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
diff --git a/src/share/vm/code/nmethod.cpp b/src/share/vm/code/nmethod.cpp
index 249872d44..8d7ae402c 100644
--- a/src/share/vm/code/nmethod.cpp
+++ b/src/share/vm/code/nmethod.cpp
@@ -771,7 +771,11 @@ nmethod::nmethod(
_hotness_counter = NMethodSweeper::hotness_counter_reset_val();
code_buffer->copy_values_to(this);
- debug_only(verify_scavenge_root_oops());
+ if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
+ CodeCache::add_scavenge_root_nmethod(this);
+ Universe::heap()->register_nmethod(this);
+ }
+ DEBUG_ONLY(verify_scavenge_root_oops();)
CodeCache::commit(this);
}
diff --git a/src/share/vm/oops/klass.cpp b/src/share/vm/oops/klass.cpp
index 2e8ef6f8f..007bb1e81 100644
--- a/src/share/vm/oops/klass.cpp
+++ b/src/share/vm/oops/klass.cpp
@@ -496,6 +496,7 @@ void Klass::remove_unshareable_info() {
}
void Klass::restore_unshareable_info(TRAPS) {
+ TRACE_INIT_ID(this);
// If an exception happened during CDS restore, some of these fields may already be
// set. We leave the class on the CLD list, even if incomplete so that we don't
// modify the CLD list outside a safepoint.
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
index 23ecdfc11..651adc137 100644
--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -693,6 +693,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
#endif
set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
+ set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
// Make sure the method being compiled gets its own MDO,
@@ -977,6 +978,8 @@ Compile::Compile( ciEnv* ci_env,
set_print_assembly(PrintFrameConverterAssembly);
set_parsed_irreducible_loop(false);
#endif
+ set_has_irreducible_loop(false); // no loops
+
CompileWrapper cw(this);
Init(/*AliasLevel=*/ 0);
init_tf((*generator)());
@@ -1147,7 +1150,7 @@ StartNode* Compile::start() const {
if( start->is_Start() )
return start->as_Start();
}
- ShouldNotReachHere();
+ fatal("Did not find Start node!");
return NULL;
}
diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp
index b9f48c494..a496c8825 100644
--- a/src/share/vm/opto/compile.hpp
+++ b/src/share/vm/opto/compile.hpp
@@ -319,6 +319,7 @@ class Compile : public Phase {
bool _trace_opto_output;
bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
#endif
+ bool _has_irreducible_loop; // Found irreducible loops
// JSR 292
bool _has_method_handle_invokes; // True if this method has MethodHandle invokes.
RTMState _rtm_state; // State of Restricted Transactional Memory usage
@@ -605,6 +606,8 @@ class Compile : public Phase {
void set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
int _in_dump_cnt; // Required for dumping ir nodes.
#endif
+ bool has_irreducible_loop() const { return _has_irreducible_loop; }
+ void set_has_irreducible_loop(bool z) { _has_irreducible_loop = z; }
// JSR 292
bool has_method_handle_invokes() const { return _has_method_handle_invokes; }
diff --git a/src/share/vm/opto/loopnode.cpp b/src/share/vm/opto/loopnode.cpp
index 4f11936ec..093fcf81b 100644
--- a/src/share/vm/opto/loopnode.cpp
+++ b/src/share/vm/opto/loopnode.cpp
@@ -266,9 +266,9 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
// Counted loop head must be a good RegionNode with only 3 not NULL
// control input edges: Self, Entry, LoopBack.
- if (x->in(LoopNode::Self) == NULL || x->req() != 3)
+ if (x->in(LoopNode::Self) == NULL || x->req() != 3 || loop->_irreducible) {
return false;
-
+ }
Node *init_control = x->in(LoopNode::EntryControl);
Node *back_control = x->in(LoopNode::LoopBackControl);
if (init_control == NULL || back_control == NULL) // Partially dead
@@ -1522,11 +1522,11 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
// If I have one hot backedge, peel off myself loop.
// I better be the outermost loop.
- if( _head->req() > 3 ) {
+ if (_head->req() > 3 && !_irreducible) {
split_outer_loop( phase );
result = true;
- } else if( !_head->is_Loop() && !_irreducible ) {
+ } else if (!_head->is_Loop() && !_irreducible) {
// Make a new LoopNode to replace the old loop head
Node *l = new (phase->C) LoopNode( _head->in(1), _head->in(2) );
l = igvn.register_new_node_with_optimizer(l, _head);
@@ -2938,6 +2938,7 @@ int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
return pre_order;
}
}
+ C->set_has_irreducible_loop(_has_irreducible_loops);
}
// This Node might be a decision point for loops. It is only if
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
index 3a6d4998b..7330f77ff 100644
--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
@@ -306,33 +306,16 @@ Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
int alias_idx = phase->C->get_alias_index(t_adr->is_ptr());
}
-#ifdef ASSERT
Node* base = NULL;
- if (address->is_AddP())
+ if (address->is_AddP()) {
base = address->in(AddPNode::Base);
+ }
if (base != NULL && phase->type(base)->higher_equal(TypePtr::NULL_PTR) &&
!t_adr->isa_rawptr()) {
// Note: raw address has TOP base and top->higher_equal(TypePtr::NULL_PTR) is true.
- Compile* C = phase->C;
- tty->cr();
- tty->print_cr("===== NULL+offs not RAW address =====");
- if (C->is_dead_node(this->_idx)) tty->print_cr("'this' is dead");
- if ((ctl != NULL) && C->is_dead_node(ctl->_idx)) tty->print_cr("'ctl' is dead");
- if (C->is_dead_node(mem->_idx)) tty->print_cr("'mem' is dead");
- if (C->is_dead_node(address->_idx)) tty->print_cr("'address' is dead");
- if (C->is_dead_node(base->_idx)) tty->print_cr("'base' is dead");
- tty->cr();
- base->dump(1);
- tty->cr();
- this->dump(2);
- tty->print("this->adr_type(): "); adr_type()->dump(); tty->cr();
- tty->print("phase->type(address): "); t_adr->dump(); tty->cr();
- tty->print("phase->type(base): "); phase->type(address)->dump(); tty->cr();
- tty->cr();
- }
- assert(base == NULL || t_adr->isa_rawptr() ||
- !phase->type(base)->higher_equal(TypePtr::NULL_PTR), "NULL+offs not RAW address?");
-#endif
+ // Skip this node optimization if its address has TOP base.
+ return NodeSentinel; // caller will return NULL
+ }
// Avoid independent memory operations
Node* old_mem = mem;
diff --git a/src/share/vm/opto/node.cpp b/src/share/vm/opto/node.cpp
index 2ba71f6bc..f110b0e23 100644
--- a/src/share/vm/opto/node.cpp
+++ b/src/share/vm/opto/node.cpp
@@ -27,6 +27,7 @@
#include "memory/allocation.inline.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
+#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/node.hpp"
@@ -1255,6 +1256,7 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
Node *top = igvn->C->top();
nstack.push(dead);
+ bool has_irreducible_loop = igvn->C->has_irreducible_loop();
while (nstack.size() > 0) {
dead = nstack.pop();
@@ -1269,13 +1271,31 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
assert (!use->is_Con(), "Control for Con node should be Root node.");
use->set_req(0, top); // Cut dead edge to prevent processing
nstack.push(use); // the dead node again.
+ } else if (!has_irreducible_loop && // Backedge could be alive in irreducible loop
+ use->is_Loop() && !use->is_Root() && // Don't kill Root (RootNode extends LoopNode)
+ use->in(LoopNode::EntryControl) == dead) { // Dead loop if its entry is dead
+ use->set_req(LoopNode::EntryControl, top); // Cut dead edge to prevent processing
+ use->set_req(0, top); // Cut self edge
+ nstack.push(use);
} else { // Else found a not-dead user
+ // Dead if all inputs are top or null
+ bool dead_use = !use->is_Root(); // Keep empty graph alive
for (uint j = 1; j < use->req(); j++) {
- if (use->in(j) == dead) { // Turn all dead inputs into TOP
+ Node* in = use->in(j);
+ if (in == dead) { // Turn all dead inputs into TOP
use->set_req(j, top);
+ } else if (in != NULL && !in->is_top()) {
+ dead_use = false;
}
}
- igvn->_worklist.push(use);
+ if (dead_use) {
+ if (use->is_Region()) {
+ use->set_req(0, top); // Cut self edge
+ }
+ nstack.push(use);
+ } else {
+ igvn->_worklist.push(use);
+ }
}
// Refresh the iterator, since any number of kills might have happened.
k = dead->last_outs(kmin);
diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp
index b6fb87df5..1f5c964fc 100644
--- a/src/share/vm/opto/runtime.cpp
+++ b/src/share/vm/opto/runtime.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -870,7 +870,7 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
return TypeFunc::make(domain, range);
}
-// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
+// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
int num_args = 5;
diff --git a/src/share/vm/runtime/advancedThresholdPolicy.cpp b/src/share/vm/runtime/advancedThresholdPolicy.cpp
index 7d72ca7f3..1680c48e9 100644
--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp
@@ -53,7 +53,8 @@ void AdvancedThresholdPolicy::initialize() {
}
set_c1_count(MAX2(count / 3, 1));
- set_c2_count(MAX2(count - count / 3, 1));
+ set_c2_count(MAX2(count - c1_count(), 1));
+ FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count());
// Some inlining tuning
#ifdef X86
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index 7441f2610..b859c15af 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -2383,6 +2383,10 @@ bool Arguments::check_vm_args_consistency() {
status &= verify_interval(NmethodSweepFraction, 1, ReservedCodeCacheSize/K, "NmethodSweepFraction");
status &= verify_interval(NmethodSweepActivity, 0, 2000, "NmethodSweepActivity");
+ if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {
+ warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
+ }
+
return status;
}
diff --git a/src/share/vm/runtime/compilationPolicy.cpp b/src/share/vm/runtime/compilationPolicy.cpp
index 23fbc87f7..3431f3cc3 100644
--- a/src/share/vm/runtime/compilationPolicy.cpp
+++ b/src/share/vm/runtime/compilationPolicy.cpp
@@ -182,6 +182,7 @@ void NonTieredCompPolicy::initialize() {
// max(log2(8)-1,1) = 2 compiler threads on an 8-way machine.
// May help big-app startup time.
_compiler_count = MAX2(log2_intptr(os::active_processor_count())-1,1);
+ FLAG_SET_ERGO(intx, CICompilerCount, _compiler_count);
} else {
_compiler_count = CICompilerCount;
}
diff --git a/src/share/vm/runtime/sharedRuntime.cpp b/src/share/vm/runtime/sharedRuntime.cpp
index 0b3d62893..b41df9f48 100644
--- a/src/share/vm/runtime/sharedRuntime.cpp
+++ b/src/share/vm/runtime/sharedRuntime.cpp
@@ -2690,19 +2690,20 @@ JRT_ENTRY_NO_ASYNC(void, SharedRuntime::block_for_jni_critical(JavaThread* threa
JRT_END
#ifdef HAVE_DTRACE_H
-// Create a dtrace nmethod for this method. The wrapper converts the
-// java compiled calling convention to the native convention, makes a dummy call
-// (actually nops for the size of the call instruction, which become a trap if
-// probe is enabled). The returns to the caller. Since this all looks like a
-// leaf no thread transition is needed.
-
+/**
+ * Create a dtrace nmethod for this method. The wrapper converts the
+ * Java-compiled calling convention to the native convention, makes a dummy call
+ * (actually nops for the size of the call instruction, which become a trap if
+ * probe is enabled), and finally returns to the caller. Since this all looks like a
+ * leaf, no thread transition is needed.
+ */
nmethod *AdapterHandlerLibrary::create_dtrace_nmethod(methodHandle method) {
ResourceMark rm;
nmethod* nm = NULL;
if (PrintCompilation) {
ttyLocker ttyl;
- tty->print("--- n%s ");
+ tty->print("--- n ");
method->print_short_name(tty);
if (method->is_static()) {
tty->print(" (static)");
diff --git a/src/share/vm/runtime/simpleThresholdPolicy.cpp b/src/share/vm/runtime/simpleThresholdPolicy.cpp
index ad4c27cf2..a14a67b43 100644
--- a/src/share/vm/runtime/simpleThresholdPolicy.cpp
+++ b/src/share/vm/runtime/simpleThresholdPolicy.cpp
@@ -142,7 +142,8 @@ void SimpleThresholdPolicy::initialize() {
count = MAX2(log2_intptr(os::active_processor_count()), 1) * 3 / 2;
}
set_c1_count(MAX2(count / 3, 1));
- set_c2_count(MAX2(count - count / 3, 1));
+ set_c2_count(MAX2(count - c1_count(), 1));
+ FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count());
}
void SimpleThresholdPolicy::set_carry_if_necessary(InvocationCounter *counter) {
@@ -191,6 +192,10 @@ nmethod* SimpleThresholdPolicy::event(methodHandle method, methodHandle inlinee,
thread->is_interp_only_mode()) {
return NULL;
}
+ if (CompileTheWorld || ReplayCompiles) {
+ // Don't trigger other compiles in testing mode
+ return NULL;
+ }
nmethod *osr_nm = NULL;
handle_counter_overflow(method());
diff --git a/test/compiler/7184394/TestAESBase.java b/test/compiler/7184394/TestAESBase.java
index 511b97dc6..4d3204880 100644
--- a/test/compiler/7184394/TestAESBase.java
+++ b/test/compiler/7184394/TestAESBase.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,9 +40,20 @@ abstract public class TestAESBase {
int msgSize = Integer.getInteger("msgSize", 646);
boolean checkOutput = Boolean.getBoolean("checkOutput");
boolean noReinit = Boolean.getBoolean("noReinit");
+ boolean testingMisalignment;
+ private static final int ALIGN = 8;
+ int encInputOffset = Integer.getInteger("encInputOffset", 0) % ALIGN;
+ int encOutputOffset = Integer.getInteger("encOutputOffset", 0) % ALIGN;
+ int decOutputOffset = Integer.getInteger("decOutputOffset", 0) % ALIGN;
+ int lastChunkSize = Integer.getInteger("lastChunkSize", 32);
int keySize = Integer.getInteger("keySize", 128);
+ int inputLength;
+ int encodeLength;
+ int decodeLength;
+ int decodeMsgSize;
String algorithm = System.getProperty("algorithm", "AES");
String mode = System.getProperty("mode", "CBC");
+ String paddingStr = System.getProperty("paddingStr", "PKCS5Padding");
byte[] input;
byte[] encode;
byte[] expectedEncode;
@@ -51,7 +62,6 @@ abstract public class TestAESBase {
Random random = new Random(0);
Cipher cipher;
Cipher dCipher;
- String paddingStr = "PKCS5Padding";
AlgorithmParameters algParams;
SecretKey key;
@@ -67,7 +77,10 @@ abstract public class TestAESBase {
public void prepare() {
try {
- System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput);
+ System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", paddingStr=" + paddingStr + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput + ", encInputOffset=" + encInputOffset + ", encOutputOffset=" + encOutputOffset + ", decOutputOffset=" + decOutputOffset + ", lastChunkSize=" +lastChunkSize );
+
+ if (encInputOffset % ALIGN != 0 || encOutputOffset % ALIGN != 0 || decOutputOffset % ALIGN !=0 )
+ testingMisalignment = true;
int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
byte keyBytes[] = new byte[keyLenBytes];
@@ -81,10 +94,6 @@ abstract public class TestAESBase {
System.out.println("Algorithm: " + key.getAlgorithm() + "("
+ key.getEncoded().length * 8 + "bit)");
}
- input = new byte[msgSize];
- for (int i=0; i<input.length; i++) {
- input[i] = (byte) (i & 0xff);
- }
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
@@ -103,10 +112,35 @@ abstract public class TestAESBase {
childShowCipher();
}
+ inputLength = msgSize + encInputOffset;
+ if (testingMisalignment) {
+ encodeLength = cipher.getOutputSize(msgSize - lastChunkSize) + encOutputOffset;
+ encodeLength += cipher.getOutputSize(lastChunkSize);
+ decodeLength = dCipher.getOutputSize(encodeLength - lastChunkSize) + decOutputOffset;
+ decodeLength += dCipher.getOutputSize(lastChunkSize);
+ } else {
+ encodeLength = cipher.getOutputSize(msgSize) + encOutputOffset;
+ decodeLength = dCipher.getOutputSize(encodeLength) + decOutputOffset;
+ }
+
+ input = new byte[inputLength];
+ for (int i=encInputOffset, j=0; i<inputLength; i++, j++) {
+ input[i] = (byte) (j & 0xff);
+ }
+
// do one encode and decode in preparation
- // this will also create the encode buffer and decode buffer
- encode = cipher.doFinal(input);
- decode = dCipher.doFinal(encode);
+ encode = new byte[encodeLength];
+ decode = new byte[decodeLength];
+ if (testingMisalignment) {
+ decodeMsgSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
+ decodeMsgSize += cipher.doFinal(input, (encInputOffset + msgSize - lastChunkSize), lastChunkSize, encode, (encOutputOffset + decodeMsgSize));
+
+ int tempSize = dCipher.update(encode, encOutputOffset, (decodeMsgSize - lastChunkSize), decode, decOutputOffset);
+ dCipher.doFinal(encode, (encOutputOffset + decodeMsgSize - lastChunkSize), lastChunkSize, decode, (decOutputOffset + tempSize));
+ } else {
+ decodeMsgSize = cipher.doFinal(input, encInputOffset, msgSize, encode, encOutputOffset);
+ dCipher.doFinal(encode, encOutputOffset, decodeMsgSize, decode, decOutputOffset);
+ }
if (checkOutput) {
expectedEncode = (byte[]) encode.clone();
expectedDecode = (byte[]) decode.clone();
diff --git a/test/compiler/7184394/TestAESDecode.java b/test/compiler/7184394/TestAESDecode.java
index f9ec02d15..21f1f5559 100644
--- a/test/compiler/7184394/TestAESDecode.java
+++ b/test/compiler/7184394/TestAESDecode.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,14 +33,15 @@ public class TestAESDecode extends TestAESBase {
public void run() {
try {
if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+ decode = new byte[decodeLength];
+ if (testingMisalignment) {
+ int tempSize = dCipher.update(encode, encOutputOffset, (decodeMsgSize - lastChunkSize), decode, decOutputOffset);
+ dCipher.doFinal(encode, (encOutputOffset + decodeMsgSize - lastChunkSize), lastChunkSize, decode, (decOutputOffset + tempSize));
+ } else {
+ dCipher.doFinal(encode, encOutputOffset, decodeMsgSize, decode, decOutputOffset);
+ }
if (checkOutput) {
- // checked version creates new output buffer each time
- decode = dCipher.doFinal(encode, 0, encode.length);
compareArrays(decode, expectedDecode);
- } else {
- // non-checked version outputs to existing encode buffer for maximum speed
- decode = new byte[dCipher.getOutputSize(encode.length)];
- dCipher.doFinal(encode, 0, encode.length, decode);
}
}
catch (Exception e) {
diff --git a/test/compiler/7184394/TestAESEncode.java b/test/compiler/7184394/TestAESEncode.java
index 1d6bf7fbd..f1a35bde0 100644
--- a/test/compiler/7184394/TestAESEncode.java
+++ b/test/compiler/7184394/TestAESEncode.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,14 +33,15 @@ public class TestAESEncode extends TestAESBase {
public void run() {
try {
if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+ encode = new byte[encodeLength];
+ if (testingMisalignment) {
+ int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
+ cipher.doFinal(input, (encInputOffset + msgSize - lastChunkSize), lastChunkSize, encode, (encOutputOffset + tempSize));
+ } else {
+ cipher.doFinal(input, encInputOffset, msgSize, encode, encOutputOffset);
+ }
if (checkOutput) {
- // checked version creates new output buffer each time
- encode = cipher.doFinal(input, 0, msgSize);
compareArrays(encode, expectedEncode);
- } else {
- // non-checked version outputs to existing encode buffer for maximum speed
- encode = new byte[cipher.getOutputSize(msgSize)];
- cipher.doFinal(input, 0, msgSize, encode);
}
}
catch (Exception e) {
diff --git a/test/compiler/7184394/TestAESMain.java b/test/compiler/7184394/TestAESMain.java
index ff9e12bcd..20929e8ba 100644
--- a/test/compiler/7184394/TestAESMain.java
+++ b/test/compiler/7184394/TestAESMain.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2014 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,7 +28,19 @@
* @summary add intrinsics to use AES instructions
*
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
*
* @author Tom Deneau
*/
@@ -36,12 +48,13 @@
public class TestAESMain {
public static void main(String[] args) {
int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 1000000);
+ int warmupIters = (args.length > 1 ? Integer.valueOf(args[1]) : 20000);
System.out.println(iters + " iterations");
TestAESEncode etest = new TestAESEncode();
etest.prepare();
- // warm-up for 20K iterations
+ // warm-up
System.out.println("Starting encryption warm-up");
- for (int i=0; i<20000; i++) {
+ for (int i=0; i<warmupIters; i++) {
etest.run();
}
System.out.println("Finished encryption warm-up");
@@ -54,9 +67,9 @@ public class TestAESMain {
TestAESDecode dtest = new TestAESDecode();
dtest.prepare();
- // warm-up for 20K iterations
+ // warm-up
System.out.println("Starting decryption warm-up");
- for (int i=0; i<20000; i++) {
+ for (int i=0; i<warmupIters; i++) {
dtest.run();
}
System.out.println("Finished decryption warm-up");