主題

狼靜立 V1

夏洛爾 | 2022-11-27 03:05:24 | 巴幣 0 | 人氣 194

Wolf Stand V1

實驗目標:

1.進入站立瞬間後，由於其實可能仍處於不穩定狀態，要再進入靜立狀態

2.進入站立瞬間後，可能面向並沒有瞄準目標，要轉向目標

3.使用Clamp Reward避免快速自殺

實驗設計:

1.任何弱點觸地皆失敗 (尾巴和武器並非弱點)

2.非弱點肢體

if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}

3.

//Set: judge.endEpisode = true

//Set: judge.episodeLength = 10f

//Set: tail not weakness

//Set: useClampReward = true

if(weaknessOnGround)

{

if(inferenceMode)

{

brainMode = BrainMode.GetUp;

SetModel("WolfGetUp", getUpBrain);

behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else

{

//===Train Stand===

AddReward(-1f);

judge.outLife++;

judge.Reset();

return;

//===Train Other===

// brainMode = BrainMode.GetUp;

// SetModel("WolfGetUp", getUpBrain);

// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else if(wolfRoot.localPosition.y < -10f)

{

if(inferenceMode)

{

brainMode = BrainMode.GetUp;

SetModel("WolfGetUp", getUpBrain);

behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else

{

//===Train Stand===

AddReward(-1f);

judge.outY++;

judge.Reset();

return;

//===Train Other===

// brainMode = BrainMode.GetUp;

// SetModel("WolfGetUp", getUpBrain);

// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else if(targetDistance > 500f)

{

judge.Reset();

}

else

{

targetSmoothPosition = targetPositionBuffer.GetSmoothVal();

headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);

rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);

flatTargetVelocity = rootDir;

flatTargetVelocity.y = 0f;

targetDistance = flatTargetVelocity.magnitude;

Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);

lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.up, headDir));

upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, Vector3.up));

//Lean

Vector3 leanDir = rootAimRot * flatTargetVelocity;

spineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));

spineUpAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(wolfSpine.right * -1f, leanDir));

rootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right * -1f, flatLeftDir));

rootUpAngle = Mathf.InverseLerp(180f, 20f, Vector3.Angle(wolfRoot.up, leanDir));

float velocityReward = GetVelocityReward(8f);

float angularReward = GetAngularVelocityReward(10f);

float standReward = (wolfLeftFeetBody.isStand? 0.25f : 0f) + (wolfRightFeetBody.isStand? 0.25f : 0f) + (wolfLeftHandBody.isStand? 0.25f : 0f) + (wolfRightHandBody.isStand? 0.25f : 0f);

//===Train Stand===

lastReward = (1f-velocityReward) * 0.015f + (1f-angularReward) * 0.015f

+ (lookAngle + upAngle + spineLookAngle + spineUpAngle + rootLookAngle + rootUpAngle) * 0.008f + standReward * 0.01f

+ (1f - exertionRatio) * 0.002f;

if(lookAngle > 0.9f && upAngle > 0.9f && spineLookAngle > 0.9f && rootLookAngle > 0.9f && velocityReward < 0.3f && angularReward < 0.5f && standReward > 0.9f)

{

//===Train Stand===

// Debug.Log("Stand");

totalReward += 0.01f;

AddReward( 0.01f );

//===Enter Run===

// ConfirmArrived();

// brainMode = BrainMode.Run;

// SetModel("WolfTrain", runBrain);

// behaviorParameters.BehaviorType = BehaviorType.Default;

// landingMoment = Time.fixedTime;

}