ETH官方钱包

前往
大廳
主題

道爾靜立3

夏洛爾 | 2022-11-01 11:20:53 | 巴幣 2 | 人氣 193


Doyle Stand v3
實驗目標:
1.進入站立瞬間後,由於其實可能仍處於不穩定狀態,要再進入靜立狀態
2.進入站立瞬間後,可能面向並沒有瞄準目標,要轉向目標

實驗設計:
1.任何弱點觸地皆失敗 (尾巴和劍並非弱點)
2.
if(weaknessOnGround){if(inferenceMode){brainMode = DoyleMode.GetUp;SetModel("DoyleGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outLife++;judge.Reset();return;// brainMode = DoyleMode.GetUp;// SetModel("DoyleGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else if(doyleRoot.localPosition.y < -10f){if(inferenceMode){brainMode = DoyleMode.GetUp;SetModel("DoyleGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outY++;judge.Reset();return;// brainMode = DoyleMode.GetUp;// SetModel("DoyleGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(doyleHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(doyleRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleHead.up, headDir));upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleHead.right * -1f, Vector3.up));//LeanVector3 leanDir = rootAimRot * flatTargetVelocity;spineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleSpine.forward, flatLeftDir));spineUpAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(doyleSpine.right * -1f, leanDir));rootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(doyleRoot.right * -1f, flatLeftDir));rootUpAngle = Mathf.InverseLerp(180f, 20f, Vector3.Angle(doyleRoot.up, leanDir));// float velocityReward = Mathf.InverseLerp(0f, 10f, doyleRootRb.velocity.magnitude) * 0.5f + Mathf.InverseLerp(0f, 10f, doyleSpineRb.velocity.magnitude) * 0.3f + Mathf.InverseLerp(0f, 10f, doyleHeadRb.velocity.magnitude) * 0.2f;// float angularReward = Mathf.InverseLerp(0f, 6.28f, doyleRootRb.angularVelocity.magnitude) * 0.2f + Mathf.InverseLerp(0f, 6.28f, doyleSpineRb.angularVelocity.magnitude) * 0.3f + Mathf.InverseLerp(0f, 6.28f, doyleHeadRb.angularVelocity.magnitude) * 0.5f;float velocityReward = GetVelocityReward(8f);float angularReward = GetAngularVelocityReward(10f);float standReward = (doyleLeftFeetBody.isStand? 0.5f : 0f) + (doyleRightFeetBody.isStand? 0.5f : 0f);lastReward = (1f-velocityReward) * 0.015f + (1f-angularReward) * 0.015f+ (lookAngle + upAngle + spineLookAngle + spineUpAngle + rootLookAngle + rootUpAngle) * 0.008f + standReward * 0.01f+ (1f - exertionRatio) * 0.002f;totalReward += lastReward;AddReward( lastReward );if(Time.fixedTime - landingMoment > landingBufferTime){bool outVelocity = velocityReward > Mathf.Lerp(1f, 0.3f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);bool outAngularVelocity = angularReward > Mathf.Lerp(1f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);bool outSpeed = outVelocity || outAngularVelocity;float aimLimit = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float aimLimit2 = Mathf.Lerp(0f, 0.9f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);bool outDirection = lookAngle < aimLimit2 || upAngle < aimLimit2 || spineLookAngle < aimLimit2 || rootLookAngle < aimLimit2;bool outMotion = spineUpAngle < aimLimit || rootUpAngle < aimLimit;if( outSpeed || outDirection || outMotion){AddReward(-1f);if(outSpeed){judge.outSpeed++;}if(outDirection){judge.outDirection++;}if(outMotion){judge.outMotion++;}judge.Reset();return;}}if(lookAngle > 0.9f && upAngle > 0.9f  && spineLookAngle > 0.9f  && rootLookAngle > 0.9f && velocityReward < 0.3f && angularReward < 0.5f && standReward > 0.9f){Debug.Log("Stand");totalReward += 0.01f;AddReward( 0.01f );}}
3.
for(int i=0; i<doyleBodies.Length; i++){if(doyleBodies[i].isGrounded){if(doyleBodies[i].isWeakness){weaknessOnGround = true;}else{//===Train Stand===if(doyleBodies[i].damageCoef > 0f){AddReward(-0.1f * doyleBodies[i].damageCoef);}}ConfirmLanding();// ConfirmArrived();}}

//大致來說
--1.獎勵抑制速度和角速度,並使用ForceSharping
--2.獎勵視線角度,並使用ForceSharping
--3.獎勵Spine和Root角度為前傾角度,並使用ForceSharping
--4.獎勵雙足接觸地面
--5.當符合靜立標準,會額外加分
--6.尾巴和劍可以觸地,但會扣分

實驗時間:
Step: 5e7
Time Elapsed: 74648s (20.73hr)

實驗結果:
實驗結果為成功

道爾可以非常有效率的調整面向,並進入不錯的靜立狀態

雖然
1.常常會會用劍當拐杖,尤其靜立後期
2.訓練尚未抵達極限時間長度並進入循環領域,全部都會被Force Sharping淘汰
關於被Force Sharping淘汰的項目,根據觀測為 "全部"
其中以outAngularVelocity大約為5成
而各角度造成outDirection和outMotion大約為4.5成
outVelocity約為0.5成

這裡可以認為
1.劍當拐杖的處罰不夠重,或設計不夠好,例如可以利用Force Sharping讓靜立開始一段時間後劍不可觸地
2.訓練不足 或是 Force Sharping太嚴格,包括時間太嚴格或極限值太嚴格

但總之靜立目前作為面向調整的表現相當理想
可以看到影片最後將水月擊飛與水月自行移動,道爾修正面向的表現非常即時
雖然長時間靜立還是會崩潰,但目前要做為銜接跑動以十分足夠

因此下個實驗預計:
進行道爾跑動實驗

而有鑑於道爾出道的企劃時間考量和先前跑動實驗的結果

預計跑動實驗設計為
1.中度前傾
2.鼓勵大腿角度平行,並輕度Force Sharping
3.鼓勵手臂和劍的角度,但非常輕度Force Sharping

創作回應

更多創作