ETH官方钱包

前往
大廳
主題

狗頭人哨兵 追逐 9

夏洛爾 | 2022-12-11 21:41:23 | 巴幣 0 | 人氣 173


Kobold Sentinel Run V9

實驗?zāi)繕?biāo):
1.進入靜立狀態(tài)後,進入追逐狀態(tài),在追逐狀態(tài)下,要能持續(xù)跑至接近目標(biāo)的距離內(nèi)
2.動作引導(dǎo)為雙臂展開身體前傾的帥氣奔跑動作
3.尺寸非平均機率分配 (尺寸1出現(xiàn)機率為尺寸2  2.5倍)
4.Force Sharping有容錯範(fàn)圍 (允許5秒內(nèi)含總計2秒的失誤)

實驗設(shè)計:
1.任何弱點觸地皆失敗 (尾巴、武器和Calf並非弱點)
2.使用ClampReward
if(koboldBodies[i].damageCoef > 0f){clampReward += -0.1f * koboldBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = false//Set: nearModeRange = 1f//Set: weapon, tail is not weakness. If is, Stand would back to GetUp//Set: calf is not weaknessif(weaknessOnGround){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{#if UNITY_EDITORDebug.Log( "Killed by weaknessOnGround");LogWeaknessOnGround();#endifAddReward(-1f);judge.outLife++;judge.Reset();return;}}else if(koboldRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{#if UNITY_EDITORDebug.Log( "Killed by y");#endifAddReward(-1f);judge.outY++;judge.Reset();return;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;//Naruto ArmVector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.up, headDir));//Side LookupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.forward, flatLeftDir));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.2f;//LeanVector3 flatNormal = flatTargetVelocity.normalized;Quaternion flatCoord = Quaternion.LookRotation(flatNormal, Vector3.up);Vector3 leanDir = flatCoord * rootAimRot * Vector3.forward;spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldSpine.right * -1f, leanDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRoot.up, leanDir));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftUpperArm.right, flatCoord * leftUpperArmAimRot * Vector3.forward));leftForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftForeArm.right, flatCoord * leftForeArmAimRot * Vector3.forward));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightUpperArm.right, flatCoord * rightUpperArmAimRot * Vector3.forward));rightForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightForeArm.right, flatCoord * rightForeArmAimRot * Vector3.forward));weaponAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldWeapon.up, flatCoord * weaponAimRot * Vector3.forward));tailRootAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailRoot.right, flatNormal));tailMidAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailMid.right, flatNormal));tailTopAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailTop.right, flatNormal));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightThigh.forward * -1f, flatLeftDir));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;velocityCoef = Mathf.InverseLerp(0f, 10f, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(!inferenceMode){if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float motionLimit = Mathf.Lerp(0f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/6f);bool outMotion = lookAngle < motionLimit2 || upAngle < motionLimit2 || leftThighAngle < motionLimit2 || rightThighAngle < motionLimit2 || spineUpAngle < motionLimit || rootUpAngle < motionLimit || leftUpperArmAngle < motionLimit || leftForeArmAngle < motionLimit || rightUpperArmAngle < motionLimit || rightForeArmAngle < motionLimit|| weaponAngle < motionLimit;if( outSpeed || outDirection || outMotion){// AddReward(-1f);if(outSpeed){#if UNITY_EDITORDebug.Log("outSpeed");#endifclampReward += -0.05f;judge.outSpeed++;}if(outDirection){#if UNITY_EDITORDebug.Log("outDirection");#endifclampReward += -0.05f;judge.outDirection++;}if(outMotion){#if UNITY_EDITORDebug.Log("outMotion");if(lookAngle < motionLimit2){Debug.Log("lookAngle < motionLimit2");}if(upAngle < motionLimit2){Debug.Log("upAngle < motionLimit2");}if(leftThighAngle < motionLimit2){Debug.Log("leftThighAngle < motionLimit2");}if(rightThighAngle < motionLimit2 ){Debug.Log("rightThighAngle < motionLimit2 ");}if(spineUpAngle < motionLimit){Debug.Log("spineUpAngle < motionLimit");}if(rootUpAngle < motionLimit){Debug.Log("rootUpAngle < motionLimit");}if(leftUpperArmAngle < motionLimit){Debug.Log("leftUpperArmAngle < motionLimit");}if(leftForeArmAngle < motionLimit){Debug.Log("leftForeArmAngle < motionLimit");}if(rightUpperArmAngle < motionLimit){Debug.Log("rightUpperArmAngle < motionLimit");}if(rightForeArmAngle < motionLimit){Debug.Log("rightForeArmAngle < motionLimit");}if(weaponAngle < motionLimit){Debug.Log("weaponAngle < motionLimit");}#endifclampReward += -0.05f;judge.outMotion++;}sharpingBuffer.PushVal(-1f);// judge.Reset();// return;}else{sharpingBuffer.PushVal(0f);}#if UNITY_EDITORsharpingVal = sharpingBuffer.GetSmoothVal();#endif// Debug.Log( sharpingBuffer.GetSmoothVal() );if( sharpingBuffer.GetSmoothVal() < sharpingResetVal){#if UNITY_EDITORDebug.Log( "Killed by ForceSharping");#endif// Debug.Log( "sharpingVal: " + sharpingVal );// Debug.Log( "sharpingResetVal: " + sharpingResetVal );AddReward(-1f);judge.Reset();return;}}lastReward = (velocityAngleCoef + velocityCoef) * 0.02f + (lookAngle+upAngle) * 0.0125f + (leftThighAngle+rightThighAngle) * 0.0075f+ (spineUpAngle+rootUpAngle) * 0.005f+ (leftUpperArmAngle+leftForeArmAngle+rightUpperArmAngle+rightForeArmAngle+weaponAngle+tailRootAngle+tailMidAngle+tailTopAngle ) * 0.001f+ (1f - exertionRatio) * 0.002f;if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < -0.05f) lastReward = -0.05f;}totalReward += lastReward;AddReward( lastReward );}// else if(targetDistance > 1.5f)else{// AddReward(1f);judge.survived++;judge.Reset();return;}}}

//大致來說,
--1.獎勵視線,並使用Force Sharping
--2.獎勵投影至"跑動推薦向量"的速度和角度,並使用Force Sharping
--3.獎勵Root、Spine、雙臂特定向量(forward/up/right)、武器符合指定角度,並使用Force Sharping
--4.獎勵尾巴全體符合指定角度,但"並不使用Force Sharping"
--5.獎勵大腿不能側(cè)開,並使用Force Sharping
--6.獎勵減少動作變化

4.Force Sharping改為有容錯空間,但是容許值逆向Sharping
允許角色在5秒內(nèi)發(fā)生總計2秒以內(nèi)的失誤,希望藉此讓角色就算輕微失衡也能嘗試自行修正
但是容許值是逆向Sharping,會在開始Force Sharping後兩秒才逐步放寬標(biāo)準(zhǔn)

實驗時間:
Step: 5e7
Time Elapsed: 58355s (16.21hr)

實驗結(jié)果:
實驗結(jié)果為成功,並且非常理想

基本上所有尺寸的狗頭人哨兵都能進行非常長距離的跑動
失衡也能自行修正

雖然還是有觀測到跌倒的案例,但是比例極低,可以認(rèn)為就是增加訓(xùn)練能改善的程度

呵哈哈哈,搞那麼久,原來是有Bug
總之終於練出理想的模型就是舒心

終於可以把狗頭人哨兵往下一個階段推進了

要說新的難題的話,就是其實還沒想好下一個階段具體要怎麼設(shè)計
主要是希望狗頭人哨兵學(xué)會揮舞武器並打擊目標(biāo)

但光是是否要先停在目標(biāo)前面,還是邊跑邊攻擊就有很多選項
總之跑動終於克服,而且事實也證明亂搞一通弄出Bug或盲點,反而只會浪費一堆時間

這次就花點時間冷靜的思考一下再開始吧

創(chuàng)作回應(yīng)

更多創(chuàng)作