if(koboldBodies[i].damageCoef > 0f){// AddReward(-0.01f * koboldBodies[i].damageCoef);clampReward += -0.01f * koboldBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = true
//Set: judge.episodeLength = 10f
//Set: weapon, tail not weakness
//Set: useClampReward = true
if(weaknessOnGround)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
//===Train Stand===
AddReward(-1f);
judge.outLife++;
judge.Reset();
return;
//===Train Other===
// brainMode = BrainMode.GetUp;
// SetModel("KoboldGetUp", getUpBrain);
// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}
else if(koboldRoot.localPosition.y < -10f)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
//===Train Stand===
AddReward(-1f);
judge.outY++;
judge.Reset();
return;
//===Train Other===
// brainMode = BrainMode.GetUp;
// SetModel("KoboldGetUp", getUpBrain);
// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}
else if(targetDistance > 500f)
{
judge.Reset();
}
else
{
targetSmoothPosition = targetPositionBuffer.GetSmoothVal();
headDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldHeadRb.position);
rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldRootRb.position);
flatTargetVelocity = rootDir;
flatTargetVelocity.y = 0f;
targetDistance = flatTargetVelocity.magnitude;
Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);
lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.up, headDir));
upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.right * -1f, Vector3.up));
//Lean
Vector3 leanDir = rootAimRot * flatTargetVelocity;
spineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldSpine.forward, flatLeftDir));
spineUpAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(koboldSpine.right * -1f, leanDir));
rootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRoot.right * -1f, flatLeftDir));
rootUpAngle = Mathf.InverseLerp(180f, 20f, Vector3.Angle(koboldRoot.up, leanDir));
float velocityReward = GetVelocityReward(8f);
float angularReward = GetAngularVelocityReward(10f);
float standReward = (koboldLeftFeetBody.isStand? 0.5f : 0f) + (koboldRightFeetBody.isStand? 0.5f : 0f);
//===Train Stand===
lastReward = (1f-velocityReward) * 0.015f + (1f-angularReward) * 0.015f
+ (lookAngle + upAngle + spineLookAngle + spineUpAngle + rootLookAngle + rootUpAngle) * 0.008f + standReward * 0.01f
+ (1f - exertionRatio) * 0.002f;
if(lookAngle > 0.9f && upAngle > 0.9f && spineLookAngle > 0.9f && rootLookAngle > 0.9f && velocityReward < 0.3f && angularReward < 0.5f && standReward > 0.9f)
{
//===Train Stand===
// Debug.Log("Stand");
lastReward += 0.01f;
//===Enter Run===
// ConfirmArrived();
// brainMode = BrainMode.Run;
// SetModel("KoboldSentinelTrain", runBrain);
// behaviorParameters.BehaviorType = BehaviorType.Default;
// landingMoment = Time.fixedTime;
}
//===Train Stand===
if(useClampReward)
{
lastReward = lastReward+clampReward;
if(lastReward < 0f) lastReward = 0f;
}
totalReward += lastReward;
AddReward( lastReward );
}
//大致來說
1.鼓勵面向
2.鼓勵抑制速度和角速度
3.鼓勵雙腳觸地
4.鼓勵抑制出力
5.使用ClampReward
實驗時間:
Step: 5e7
Time Elapsed: 49801s (13.83hr)
實驗結果:
實驗結果為失敗
狗頭人沒有明顯進行面向瞄準的行為,也大量傾向後仰再靠尾巴支撐
然後最失敗是原本是打算沿用道爾靜立3的研究成果
但看來改成狗頭人哨兵時不慎把Force Sharping的引導刪除了
也就是說因為粗心,結果進行了一個等同道爾靜力2的訓練,重新得到一個同樣不佳的結果
但是道爾靜立3研究也註明了一些問題
預計在下個實驗一同進行改善
因此下個實驗預計進行狗頭人哨兵靜立
1.獎勵瞄準方向,使用Force Sharping銳化調整面向速度
2.獎勵抑制全身速度和角速度
3.原先引導Spine和Root的垂直向量介於Vector3.up的指定夾角內,但這等同鼓勵道爾可以後仰
將會將垂直向量的目標改為前傾角度,例外一樣使用Force Sharping銳化收斂性
4.增加ClampReward機制
5.根據道爾靜立3調整Force Sharping數值
這裡有一個選項是,是否要將靜立實驗改為"快速調整面向"
而不需要在意是否靜立能夠延續
暫時認為不要,雖然做為銜接跑動,快速調整面向會更好
但能長時間持續的靜立有很多用途,因此暫時先認為不要,但持續思考到明天正式修改狗頭人哨兵靜立時