Wolf Stand V1
實驗目標:
1.進入站立瞬間後,由於其實可能仍處於不穩定狀態,要再進入靜立狀態
2.進入站立瞬間後,可能面向並沒有瞄準目標,要轉向目標
3.使用Clamp Reward避免快速自殺
實驗設計:
1.任何弱點觸地皆失敗 (尾巴和武器並非弱點)
2.非弱點肢體
if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = true
//Set: judge.episodeLength = 10f
//Set: tail not weakness
//Set: useClampReward = true
if(weaknessOnGround)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("WolfGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
//===Train Stand===
AddReward(-1f);
judge.outLife++;
judge.Reset();
return;
//===Train Other===
// brainMode = BrainMode.GetUp;
// SetModel("WolfGetUp", getUpBrain);
// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}
else if(wolfRoot.localPosition.y < -10f)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("WolfGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
//===Train Stand===
AddReward(-1f);
judge.outY++;
judge.Reset();
return;
//===Train Other===
// brainMode = BrainMode.GetUp;
// SetModel("WolfGetUp", getUpBrain);
// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}
else if(targetDistance > 500f)
{
judge.Reset();
}
else
{
targetSmoothPosition = targetPositionBuffer.GetSmoothVal();
headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);
rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);
flatTargetVelocity = rootDir;
flatTargetVelocity.y = 0f;
targetDistance = flatTargetVelocity.magnitude;
Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);
lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.up, headDir));
upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, Vector3.up));
//Lean
Vector3 leanDir = rootAimRot * flatTargetVelocity;
spineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));
spineUpAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(wolfSpine.right * -1f, leanDir));
rootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right * -1f, flatLeftDir));
rootUpAngle = Mathf.InverseLerp(180f, 20f, Vector3.Angle(wolfRoot.up, leanDir));
float velocityReward = GetVelocityReward(8f);
float angularReward = GetAngularVelocityReward(10f);
float standReward = (wolfLeftFeetBody.isStand? 0.25f : 0f) + (wolfRightFeetBody.isStand? 0.25f : 0f) + (wolfLeftHandBody.isStand? 0.25f : 0f) + (wolfRightHandBody.isStand? 0.25f : 0f);
//===Train Stand===
lastReward = (1f-velocityReward) * 0.015f + (1f-angularReward) * 0.015f
+ (lookAngle + upAngle + spineLookAngle + spineUpAngle + rootLookAngle + rootUpAngle) * 0.008f + standReward * 0.01f
+ (1f - exertionRatio) * 0.002f;
if(lookAngle > 0.9f && upAngle > 0.9f && spineLookAngle > 0.9f && rootLookAngle > 0.9f && velocityReward < 0.3f && angularReward < 0.5f && standReward > 0.9f)
{
//===Train Stand===
// Debug.Log("Stand");
totalReward += 0.01f;
AddReward( 0.01f );
//===Enter Run===
// ConfirmArrived();
// brainMode = BrainMode.Run;
// SetModel("WolfTrain", runBrain);
// behaviorParameters.BehaviorType = BehaviorType.Default;
// landingMoment = Time.fixedTime;
}
//===Train Stand===
if(useClampReward)
{
lastReward = lastReward+clampReward;
if(lastReward < 0f) lastReward = 0f;
}
totalReward += lastReward;
AddReward( lastReward );
}
//大致來說
1.鼓勵面向
2.鼓勵抑制速度和角速度
3.鼓勵雙腳觸地
4.鼓勵抑制出力
5.使用ClampReward
實驗時間:
Step: 5e7
Time Elapsed: 84510s (23.48hr)
實驗結果:
實驗結果為失敗,精確來說是像災難一樣
原本也是在同樣缺少Force Sharping的情況進行,因為想要這樣訓練的數據
但沒想到會偏離成這種程度
雖然有橋面向,但超級不安定,而且橋了也會自己晃走
果然應該注意狼的後腿
原本是按照現實的犬科後腿側翼約可以展開到90度來設計
但這看起來讓狼變成真的變成奇怪的生物,不過數據還是不足以下結論
發現超級大問題,狼的關節和人型不同,而我又忘記根據狼的關節修改引導向量
因此不管受身還是靜立會亂晃動的大原因一定有包含並為主因--因為關節引導角度根本就概念不對
因此下個實驗將進行狼受身
1.修正關節引導向量
2.調整DamageCoef
3.將前後腿側翼展開範圍都減少