Doyle Chase V5
實驗目標:
1.進入靜立狀態後,進入追逐狀態,在追逐狀態下,要能持續跑至接近目標的距離內
2.動作引導為類似火影跑的開臂奔跑動作
實驗設計:
1.任何弱點觸地皆失敗 (尾巴和劍並非弱點)
2.targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(doyleHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(doyleRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleHead.up, headDir));upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleHead.right * -1f, Vector3.up));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.2f;//LeanVector3 leanDir = aimVelocity;leanDir.y = 0.1f;spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleSpine.right * -1f, leanDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleRoot.up, leanDir));//Naruto ArmVector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleLeftUpperArm.right, leftUpperArmAimRot * flatTargetVelocity));leftForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleLeftForeArm.right, leftForeArmAimRot * flatTargetVelocity));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleRightUpperArm.right, rightUpperArmAimRot * flatTargetVelocity));rightForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleRightForeArm.right, rightForeArmAimRot * flatTargetVelocity));swordAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleSword.up, swordAimRot * flatTargetVelocity));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleRightThigh.forward * -1f, flatLeftDir));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;velocityCoef = Mathf.InverseLerp(0f, 15f, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float motionLimit = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float motionLimit2 = Mathf.Lerp(0f, 0.9f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);bool outMotion = lookAngle < motionLimit2 || upAngle < motionLimit2 || leftThighAngle < motionLimit2 || rightThighAngle < motionLimit2 || spineUpAngle < motionLimit || rootUpAngle < motionLimit || leftUpperArmAngle < motionLimit || leftForeArmAngle < motionLimit || rightUpperArmAngle < motionLimit || rightForeArmAngle < motionLimit || swordAngle < motionLimit;if( outSpeed || outDirection || outMotion){AddReward(-1f);if(outSpeed){judge.outSpeed++;}if(outDirection){judge.outDirection++;}if(outMotion){judge.outMotion++;}judge.Reset();return;}}lastReward = (velocityAngleCoef + velocityCoef) * 0.004f + (lookAngle+upAngle) * 0.002f + (leftThighAngle+rightThighAngle) * 0.001f+ (spineUpAngle+rootUpAngle) * 0.001f + (leftUpperArmAngle+leftForeArmAngle+rightUpperArmAngle+rightForeArmAngle+swordAngle) * 0.001f+ (1f - exertionRatio) * 0.0005f;totalReward += lastReward;AddReward( lastReward );}
//大致來說,
--1.獎勵視線
--2.獎勵投影至"跑動推薦向量"的速度和角度,並使用Force Sharping
--3.獎勵Root和Spine前傾,獎勵雙臂方向平行指定向量,並使用Force Sharping
--4.獎勵減少動作變化
3.取消 End episode on maximum of 20s
實驗結果:
實驗結果為失敗
道爾只有一瞬間是指定動作,而且馬上就會跌倒
觀測起來是指定的前傾幅度近乎平行地面,實在太極端了
但由於訓練完成時,得分實際上仍在穩定上升
所以無法斷言是做不到還是訓練不足
下個實驗
1.減少前傾角度
2.鼓勵尾巴根部平行地面
3.訓練數量2倍
另外紀錄
得分係數為關鍵,實際上有一個腰斬實驗,由於動作得分係數比重調高,導致優先追求動作而毫無移動傾向,最後造成訓練進展完全卡關