我们知道在Android虚拟机中,虚拟机将dex文件编译成机器码的过程中,首先会将dex文件,转化为IR文件,然后在将IR文件进行优化后,翻译成相应的机器码。其中IR全称是 intermediate representation,是LLVM独创的中间表达式. 经典的compiler架构由前端frontend(读入源代码, 通过词法, 语法与语义分析建立AST), 中端optimizer(优化模块)与backend(通过指令选择, 寄存器分配等阶段最终输出为目标架构的汇编)。
HBoundsCheckElimination
今天我们讲一下IR中的一种优化方法,HBoundsCheckElimination,即消除HBoundsCheck。根据java语言规范要求,每次操作数组元素要求的时候,都要检查索引是否越界,如果越界的话,则会抛出ArrayIndexOutOfBounds异常,这个异常,想必大家在开发中一定遇到过。
ART在编译aget,aput(数组读/取指令)为IR的HArrayGet或者HArraySet的时候,同时还会生成一条HBoundsCheck IR,用于检查是否索引越界。在代码中,我们需要通过循环遍历数组,HBoundsCheck也会执行很多遍,这对程序运行是一个很大的浪费,HBoundsCheckElimination就是用于消除不必要的HBoundsCheck IR。如果本身我们的循环中最大的索引值不可能超过数组元素个数的话,那么就可以完全去掉HBoundsCheck。
int []array = new int []{
1, 2, 3, 4, 5
};
for (int i = 0; i < array.length - 2; i++) {
//do something
}
这种,遍历永远小于array的length,那么HBoundsCheck的指令就可以去掉
一般而言,我们希望Java字节码翻译成机器码,这样能提高程序的运行速度,但是有时候我们不得不放弃机器码,而退回到解释器的模式来执行顺序。比如对于刚才说的ArrayIndexOutOfBounds,遇到这种异常必须要抛出:下面程序我们循环了11次,索引从0开始每次增加1,但是array的长度只有5,所以从第6次,也就是i= 5的时候就会触发。即使明确知道该循环最大索引超过了数组长度,我们也不能在循环开始前就抛出它。而必须等到第6次执行的时候才可以抛出。遇到这种情况的话,我们需要使用解释器模式来执行抛出的操作,所以会涉及到从机器码跳回到解释器的模式,因为在Android虚拟机中,考虑到了这种情况,设计了一个名为HDeoptimize的命令,用于反优化等相关操作。
int []array = new int []{
1, 2, 3, 4, 5
};
for (int i = 0; i < 11; i++) {
int value = array[i];
}
在art/compiler/optimizing/bounds_check_elimination.cc中,每次循环开始之前,会插入一段HDeoptimize,
/** Inserts a deoptimization test in a loop preheader. */
void InsertDeoptInLoop(HLoopInformation* loop,
HBasicBlock* block,
HInstruction* condition,
bool is_null_check = false) {
HInstruction* suspend = loop->GetSuspendCheck();
block->InsertInstructionBefore(condition, block->GetLastInstruction());
DeoptimizationKind kind =
is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE;
HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize(
GetGraph()->GetAllocator(), condition, kind, suspend->GetDexPc());
block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
if (suspend->HasEnvironment()) {
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
suspend->GetEnvironment(), loop->GetHeader());
}
}
其中condition为进入HDeoptimize的条件,截取部分代码如下,有三种条件,比如最简单的第一种:如果循环的最大值比数组的长度还长,那么就会进入到deoptimize。第二种条件,是两个变量,循环最大最小值都是变量的情况。
// In code, using unsigned comparisons:
// (1) constants only
// if (max_upper >= a.length ) deoptimize;
// (2) two symbolic invariants
// if (min_upper > max_upper) deoptimize; unless min_c == max_c
// if (max_upper >= a.length ) deoptimize;
// (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around)
// if (min_lower > max_lower) deoptimize; unless min_c == max_c
// if (max_lower > max_upper) deoptimize;
// if (max_upper >= a.length ) deoptimize;
if (base == nullptr) {
// Constants only.
DCHECK_GE(min_c, 0);
DCHECK(min_lower == nullptr && min_upper == nullptr &&
max_lower == nullptr && max_upper != nullptr);
} else if (max_lower == nullptr) {
// Two symbolic invariants.
if (min_c != max_c) {
DCHECK(min_lower == nullptr && min_upper != nullptr &&
max_lower == nullptr && max_upper != nullptr);
InsertDeoptInLoop(
loop, block, new (GetGraph()->GetAllocator()) HAbove(min_upper, max_upper));
} else {
DCHECK(min_lower == nullptr && min_upper == nullptr &&
max_lower == nullptr && max_upper != nullptr);
}
} else {
// General case, unit strides.
if (min_c != max_c) {
DCHECK(min_lower != nullptr && min_upper != nullptr &&
max_lower != nullptr && max_upper != nullptr);
InsertDeoptInLoop(
loop, block, new (GetGraph()->GetAllocator()) HAbove(min_lower, max_lower));
} else {
DCHECK(min_lower == nullptr && min_upper == nullptr &&
max_lower != nullptr && max_upper != nullptr);
}
InsertDeoptInLoop(
loop, block, new (GetGraph()->GetAllocator()) HAbove(max_lower, max_upper));
}
InsertDeoptInLoop(
loop, block, new (GetGraph()->GetAllocator()) HAboveOrEqual(max_upper, array_length));
HDeoptimize的定义如下,在检查满足上面其中的一个条件时候,降级到解释器
// Deoptimize to interpreter, upon checking a condition.
class HDeoptimize FINAL : public HVariableInputSizeInstruction {
public:
// Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
// across.
HDeoptimize(ArenaAllocator* allocator,
HInstruction* cond,
DeoptimizationKind kind,
uint32_t dex_pc)
: HVariableInputSizeInstruction(
kDeoptimize,
SideEffects::All(),
dex_pc,
allocator,
/* number_of_inputs */ 1,
kArenaAllocMisc) {
SetPackedFlag<kFieldCanBeMoved>(false);
SetPackedField<DeoptimizeKindField>(kind);
SetRawInputAt(0, cond);
}
bool IsClonable() const OVERRIDE { return true; }
// Use this constructor when the `HDeoptimize` guards an instruction, and any user
// that relies on the deoptimization to pass should have its input be the `HDeoptimize`
// instead of `guard`.
// We set CanTriggerGC to prevent any intermediate address to be live
// at the point of the `HDeoptimize`.
HDeoptimize(ArenaAllocator* allocator,
HInstruction* cond,
HInstruction* guard,
DeoptimizationKind kind,
uint32_t dex_pc)
: HVariableInputSizeInstruction(
kDeoptimize,
SideEffects::CanTriggerGC(),
dex_pc,
allocator,
/* number_of_inputs */ 2,
kArenaAllocMisc) {
SetPackedFlag<kFieldCanBeMoved>(true);
SetPackedField<DeoptimizeKindField>(kind);
SetRawInputAt(0, cond);
SetRawInputAt(1, guard);
}
bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
}
bool NeedsEnvironment() const OVERRIDE { return true; }
bool CanThrow() const OVERRIDE { return true; }
DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); }
DataType::Type GetType() const OVERRIDE {
return GuardsAnInput() ? GuardedInput()->GetType() : DataType::Type::kVoid;
}
bool GuardsAnInput() const {
return InputCount() == 2;
}
HInstruction* GuardedInput() const {
DCHECK(GuardsAnInput());
return InputAt(1);
}
void RemoveGuard() {
RemoveInputAt(1);
}
DECLARE_INSTRUCTION(Deoptimize);
protected:
DEFAULT_COPY_CONSTRUCTOR(Deoptimize);
private:
static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
static constexpr size_t kFieldDeoptimizeKindSize =
MinimumBitsToStore(static_cast<size_t>(DeoptimizationKind::kLast));
static constexpr size_t kNumberOfDeoptimizePackedBits =
kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
using DeoptimizeKindField =
BitField<DeoptimizationKind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
};
HDeopimize
HDeopimize是ART HInstruction IR的一种,它将导致指令从机器码模式切换到解释器模式。这其实是一种反优化指令,属于一种从性能好的机器码模式切换到性能差的解释器模式。那么这肯定是虚拟机不得已而为之的操作,不然为什么傻乎乎的从高性能切到低性能呢?
其实,我们在上一段就介绍了,如果出现数据越界异常的时候,就会触发到HDeopimize指令。
int []array = new int []{
1, 2, 3, 4, 5
};
for (int i = 0; i < 11; i++) {
int value = array[i];
}
0000: const/4 v0, #int 5 // #5
0001: new-array v0, v0, [I // type@0e76
0003: fill-array-data v0, 00000012 // +0000000f
0006: const/4 v1, #int 0 // #0
0007: const/16 v2, #int 11 // #b
0009: if-ge v1, v2, 0010 // +0007
000b: aget v2, v0, v1
000d: add-int/lit8 v1, v1, #int 1 // #01
000f: goto 0007 // -0008
0010: return-void
0011: nop // spacer
0012: array-data (14 units)
上面这段代码,0009-000f是循环对应的指令范围,依据上面的HDeoptimize的插入规则,在0007处检查满足条件后,就会进入到HDeopimize代码中,其中进入的代码,通过下面定义,name = Deoptimize
#define DEFINE_ACCEPT(name, super) \
void H##name::Accept(HGraphVisitor* visitor) { \
visitor->Visit##name(this); \
}
FOR_EACH_CONCRETE_INSTRUCTION(DEFINE_ACCEPT)
#undef DEFINE_ACCEPT
=== >
void HDeopimize::Accept(HGraphVisitor* visitor) {
visitor->VisitDeoptimize(this);
}
也就是要进入到VisitDeoptimize方法中,这个方法不不同的架构中,有不同的实现,我们以arm64为例,看下这块是怎么实现,从机器码跳转到字节码中的。
在art/compiler/optimizing/code_generator_arm64.cc
void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCodeARM64* slow_path =
deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
/* false_target */ nullptr);
}
这段代码,会创建一个DeoptimizationSlowPathARM64的对象,同时会通过GenerateTestAndBranch生成一些机器码。来看
DeoptimizationSlowPathARM64代码
class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
public:
explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
: SlowPathCodeARM64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ Mov(calling_convention.GetRegisterAt(0),
static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
};
这段代码将会执行kQuickDeoptimize的机器码,GetDexPc函数返回的是解释执行的指令位置,是前面代码的0007位置。
kQuickDeoptimize的定义,我们看下,
// Define an enum for the entrypoints. Names are prepended a 'kQuick'.
enum QuickEntrypointEnum { // NOLINT(whitespace/braces)
#define ENTRYPOINT_ENUM(name, rettype, ...) kQuick ## name,
#include "quick_entrypoints_list.h"
QUICK_ENTRYPOINT_LIST(ENTRYPOINT_ENUM)
#undef QUICK_ENTRYPOINT_LIST
#undef ENTRYPOINT_ENUM
====>
V(Deoptimize, void, DeoptimizationKind)
};
====> kQuickDeoptimize指向的是pDeoptimize
#define ENTRYPOINT_ENUM(name, rettype, ...) case kQuick ## name : \
return QUICK_ENTRYPOINT_OFFSET(pointer_size, p ## name);
====>
// Deoptimize真正实现
qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
这段代码,真正跳转的地址是
runtime/arch/arm64/quick_entrypoints_arm64.S
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
* will long jump to the upcall with a special exception of -1.
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME
mov x1, xSELF // Pass thread.
bl artDeoptimizeFromCompiledCode // (DeoptimizationKind, Thread*)
brk 0
END art_quick_deoptimize_from_compiled_code
可以看到,他是跳转到artDeoptimizeFromCompiledCode,去执行的
art/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
// This is called directly from compiled code by an HDeoptimize.
extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self)
REQUIRES_SHARED(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
// Before deoptimizing to interpreter, we must push the deoptimization context.
JValue return_value;
return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result.
self->PushDeoptimizationContext(return_value,
false /* is_reference */,
self->GetException(),
true /* from_code */,
DeoptimizationMethodType::kDefault);
artDeoptimizeImpl(self, kind, true);
}
它会去执行artDeoptimizeImpl函数
NO_RETURN static void artDeoptimizeImpl(Thread* self, DeoptimizationKind kind, bool single_frame)
REQUIRES_SHARED(Locks::mutator_lock_) {
Runtime::Current()->IncrementDeoptimizationCount(kind);
if (VLOG_IS_ON(deopt)) {
if (single_frame) {
// Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
// specialized visitor that will show whether a method is Quick or Shadow.
} else {
LOG(INFO) << "Deopting:";
self->Dump(LOG_STREAM(INFO));
}
}
self->AssertHasDeoptimizationContext();
QuickExceptionHandler exception_handler(self, true);
{
ScopedTrace trace(std::string("Deoptimization ") + GetDeoptimizationKindName(kind));
if (single_frame) {
exception_handler.DeoptimizeSingleFrame(kind);
} else {
exception_handler.DeoptimizeStack();
}
}
uintptr_t return_pc = exception_handler.UpdateInstrumentationStack();
if (exception_handler.IsFullFragmentDone()) {
exception_handler.DoLongJump(true);
} else {
exception_handler.DeoptimizePartialFragmentFixup(return_pc);
// We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
// be caller-saved. This has the downside that we cannot track incorrect register usage down the
// line.
exception_handler.DoLongJump(false);
}
}
artDeoptimizeImpl函数中,最重要的三个方法构造方法,DeoptimizeSingleFrame,DoLongJump
其中,构造方法,
QuickExceptionHandler::QuickExceptionHandler(Thread* self, bool is_deoptimization)
: self_(self),
context_(self->GetLongJumpContext()), //上下文
is_deoptimization_(is_deoptimization),
method_tracing_active_(is_deoptimization ||
Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
handler_quick_frame_(nullptr), // 解释执行的参数位置
handler_quick_frame_pc_(0), // 异常处理的跳转目标
handler_method_header_(nullptr),
handler_quick_arg0_(0),
handler_method_(nullptr),
handler_dex_pc_(0),
clear_exception_(false),
handler_frame_depth_(kInvalidFrameDepth),
full_fragment_done_(false) {}
void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) {
DCHECK(is_deoptimization_);
DeoptimizeStackVisitor visitor(self_, context_, this, true);
visitor.WalkStack(true);
// Compiled code made an explicit deoptimization.
ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
DCHECK(deopt_method != nullptr);
if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
LOG(INFO) << "Single-frame deopting: "
<< deopt_method->PrettyMethod()
<< " due to "
<< GetDeoptimizationKindName(kind);
DumpFramesWithType(self_, /* details */ true);
}
if (Runtime::Current()->UseJitCompilation()) {
Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
} else {
// Transfer the code to interpreter.
Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
deopt_method, GetQuickToInterpreterBridge());
}
PrepareForLongJumpToInvokeStubOrInterpreterBridge();
}
handler_quick_frame_pc_里保存的就是kQuickQuickToInterpreterBridge的地址。
void QuickExceptionHandler::PrepareForLongJumpToInvokeStubOrInterpreterBridge() {
if (full_fragment_done_) {
// Restore deoptimization exception. When returning from the invoke stub,
// ArtMethod::Invoke() will see the special exception to know deoptimization
// is needed.
self_->SetException(Thread::GetDeoptimizationException());
} else {
// PC needs to be of the quick-to-interpreter bridge.
int32_t offset;
offset = GetThreadOffset<kRuntimePointerSize>(kQuickQuickToInterpreterBridge).Int32Value();
handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
reinterpret_cast<uint8_t*>(self_) + offset);
}
}
我们在看下,DeoptimizeSingleFrame的实现,里面通过一个DeoptimizeStackVisitor去WalkStack,最后去准备解释器执行的code地址。
其中DeoptimizeStackVisitor去WalkStack方法,最终会执行VisitFrame方法,会去通过
CreateDeoptimizedFrame创建反优化的frame。
bool VisitFrame() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
ArtMethod* method = GetMethod();
if (method == nullptr || single_frame_done_) {
FinishStackWalk();
return false; // End stack walk.
} else if (method->IsRuntimeMethod()) {
// Ignore callee save method.
DCHECK(method->IsCalleeSaveMethod());
return true;
} else if (method->IsNative()) {
// If we return from JNI with a pending exception and want to deoptimize, we need to skip
// the native method.
// The top method is a runtime method, the native method comes next.
CHECK_EQ(GetFrameDepth(), 1U);
callee_method_ = method;
return true;
} else if (!single_frame_deopt_ &&
!Runtime::Current()->IsAsyncDeoptimizeable(GetCurrentQuickFramePc())) {
// We hit some code that's not deoptimizeable. However, Single-frame deoptimization triggered
// from compiled code is always allowed since HDeoptimize always saves the full environment.
LOG(WARNING) << "Got request to deoptimize un-deoptimizable method "
<< method->PrettyMethod();
FinishStackWalk();
return false; // End stack walk.
} else {
// Check if a shadow frame already exists for debugger's set-local-value purpose.
const size_t frame_id = GetFrameId();
ShadowFrame* new_frame = GetThread()->FindDebuggerShadowFrame(frame_id);
const bool* updated_vregs;
CodeItemDataAccessor accessor(method->DexInstructionData());
const size_t num_regs = accessor.RegistersSize();
if (new_frame == nullptr) {
new_frame = ShadowFrame::CreateDeoptimizedFrame(num_regs, nullptr, method, GetDexPc());
updated_vregs = nullptr;
} else {
updated_vregs = GetThread()->GetUpdatedVRegFlags(frame_id);
DCHECK(updated_vregs != nullptr);
}
HandleOptimizingDeoptimization(method, new_frame, updated_vregs);
if (updated_vregs != nullptr) {
// Calling Thread::RemoveDebuggerShadowFrameMapping will also delete the updated_vregs
// array so this must come after we processed the frame.
GetThread()->RemoveDebuggerShadowFrameMapping(frame_id);
DCHECK(GetThread()->FindDebuggerShadowFrame(frame_id) == nullptr);
}
if (prev_shadow_frame_ != nullptr) {
prev_shadow_frame_->SetLink(new_frame);
} else {
// Will be popped after the long jump after DeoptimizeStack(),
// right before interpreter::EnterInterpreterFromDeoptimize().
stacked_shadow_frame_pushed_ = true;
GetThread()->PushStackedShadowFrame(
new_frame, StackedShadowFrameType::kDeoptimizationShadowFrame);
}
prev_shadow_frame_ = new_frame;
if (single_frame_deopt_ && !IsInInlinedFrame()) {
// Single-frame deopt ends at the first non-inlined frame and needs to store that method.
single_frame_done_ = true;
single_frame_deopt_method_ = method;
single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
}
callee_method_ = method;
return true;
}
}
最后一步,执行DoLongJump操作,跳转到对应的目标位置上,进行代码处理。
context_->SetPC(handler_quick_frame_pc_);
这里的handler_quick_frame_pc_就是kQuickQuickToInterpreterBridge的地址。
void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
// Place context back on thread so it will be available when we continue.
self_->ReleaseLongJumpContext(context_);
context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_));
CHECK_NE(handler_quick_frame_pc_, 0u);
context_->SetPC(handler_quick_frame_pc_);
context_->SetArg0(handler_quick_arg0_);
if (smash_caller_saves) {
context_->SmashCallerSaves();
}
context_->DoLongJump();
UNREACHABLE();
}
void Arm64Context::DoLongJump() {
uint64_t gprs[arraysize(gprs_)];
uint64_t fprs[kNumberOfDRegisters];
// The long jump routine called below expects to find the value for SP at index 31.
DCHECK_EQ(SP, 31);
for (size_t i = 0; i < arraysize(gprs_); ++i) {
gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
}
for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : Arm64Context::kBadFprBase + i;
}
// Ensure the Thread Register contains the address of the current thread.
DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
// The Marking Register will be updated by art_quick_do_long_jump.
art_quick_do_long_jump(gprs, fprs);
}
ENTRY art_quick_do_long_jump
// Load FPRs
ldp d0, d1, [x1], #16
ldp d2, d3, [x1], #16
ldp d4, d5, [x1], #16
ldp d6, d7, [x1], #16
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp d16, d17, [x1], #16
ldp d18, d19, [x1], #16
ldp d20, d21, [x1], #16
ldp d22, d23, [x1], #16
ldp d24, d25, [x1], #16
ldp d26, d27, [x1], #16
ldp d28, d29, [x1], #16
ldp d30, d31, [x1]
// Load GPRs
// TODO: lots of those are smashed, could optimize.
add x0, x0, #30*8
ldp x30, x1, [x0], #-16 // LR & SP
ldp x28, x29, [x0], #-16
ldp x26, x27, [x0], #-16
ldp x24, x25, [x0], #-16
ldp x22, x23, [x0], #-16
ldp x20, x21, [x0], #-16
ldp x18, x19, [x0], #-16 // X18 & xSELF
ldp x16, x17, [x0], #-16
ldp x14, x15, [x0], #-16
ldp x12, x13, [x0], #-16
ldp x10, x11, [x0], #-16
ldp x8, x9, [x0], #-16
ldp x6, x7, [x0], #-16
ldp x4, x5, [x0], #-16
ldp x2, x3, [x0], #-16
mov sp, x1
REFRESH_MARKING_REGISTER
// Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
ldr x1, [x0, #33*8]
// And the value of x0.
ldr x0, [x0]
br x1
END art_quick_do_long_jump
ENTRY art_quick_to_interpreter_bridge
SETUP_SAVE_REFS_AND_ARGS_FRAME // Set up frame and save arguments.
// x0 will contain mirror::ArtMethod* method.
mov x1, xSELF // How to get Thread::Current() ???
mov x2, sp
// uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
// mirror::ArtMethod** sp)
bl artQuickToInterpreterBridge
RESTORE_SAVE_REFS_AND_ARGS_FRAME // TODO: no need to restore arguments in this case.
REFRESH_MARKING_REGISTER
fmov d0, x0
RETURN_OR_DELIVER_PENDING_EXCEPTION
END art_quick_to_interpreter_bridge
最后执行art_quick_do_long_jump,跳转到art_quick_to_interpreter_bridge,最后执行
art/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc中的artQuickToInterpreterBridge方法,完成跳转。
今天的文章art中反优化HDeoptimize分享到此就结束了,感谢您的阅读。
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
如需转载请保留出处:https://bianchenghao.cn/60968.html