Kobold Sentinel Run V6
實驗目標:
1.進入靜立狀態後,進入追逐狀態,在追逐狀態下,要能持續跑至接近目標的距離內
2.動作引導為雙臂展開身體前傾的帥氣奔跑動作
3.尺寸非平均機率分配 (尺寸1出現機率為尺寸2 2.5倍)
4.Force Sharping有容錯範圍 (允許5秒內含總計2秒的失誤)
實驗設計:
1.任何弱點觸地皆失敗 (尾巴、武器和Calf並非弱點)
2.使用ClampReward
if(koboldBodies[i].damageCoef > 0f){clampReward += -0.1f * koboldBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = false//Set: nearModeRange = 1f//Set: weapon, tail is not weakness. If is, Stand would back to GetUp//Set: calf is not weaknessif(weaknessOnGround){// LogWeaknessOnGround();if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outLife++;judge.Reset();return;}}else if(koboldRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outY++;judge.Reset();return;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;//Naruto ArmVector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.up, headDir));//Side LookupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.forward, flatLeftDir));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.2f;//LeanVector3 leanDir = rootAimRot * flatTargetVelocity;spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldSpine.right * -1f, leanDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRoot.up, leanDir));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftUpperArm.right, leftUpperArmAimRot * flatTargetVelocity));leftForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftForeArm.right, leftForeArmAimRot * flatTargetVelocity));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightUpperArm.right, rightUpperArmAimRot * flatTargetVelocity));rightForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightForeArm.right, rightForeArmAimRot * flatTargetVelocity));weaponAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldWeapon.up, weaponAimRot * flatTargetVelocity));tailRootAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailRoot.right, flatTargetVelocity));tailMidAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailMid.right, flatTargetVelocity));tailTopAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailTop.right, flatTargetVelocity));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightThigh.forward * -1f, flatLeftDir));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;velocityCoef = Mathf.InverseLerp(0f, 10f, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(!inferenceMode){if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float motionLimit = Mathf.Lerp(0f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/6f);bool outMotion = lookAngle < motionLimit2 || upAngle < motionLimit2 || leftThighAngle < motionLimit2 || rightThighAngle < motionLimit2 || spineUpAngle < motionLimit || rootUpAngle < motionLimit || leftUpperArmAngle < motionLimit || leftForeArmAngle < motionLimit || rightUpperArmAngle < motionLimit || rightForeArmAngle < motionLimit|| weaponAngle < motionLimit;if( outSpeed || outDirection || outMotion){// AddReward(-1f);if(outSpeed){Debug.Log("outSpeed");clampReward += -0.03f;judge.outSpeed++;}if(outDirection){Debug.Log("outDirection");clampReward += -0.03f;judge.outDirection++;}if(outMotion){Debug.Log("outMotion");clampReward += -0.02f;judge.outMotion++;}sharpingBuffer.PushVal(-1f);// judge.Reset();// return;}else{sharpingBuffer.PushVal(0f);}#if UNITY_EDITORsharpingVal = sharpingBuffer.GetSmoothVal();#endif// Debug.Log( sharpingBuffer.GetSmoothVal() );if( sharpingBuffer.GetSmoothVal() < sharpingResetVal){Debug.Log( "sharpingVal: " + sharpingVal );Debug.Log( "sharpingResetVal: " + sharpingResetVal );AddReward(-1f);judge.Reset();return;}}lastReward = (velocityAngleCoef + velocityCoef) * 0.02f + (lookAngle+upAngle) * 0.0125f + (leftThighAngle+rightThighAngle) * 0.0075f+ (spineUpAngle+rootUpAngle) * 0.005f+ (leftUpperArmAngle+leftForeArmAngle+rightUpperArmAngle+rightForeArmAngle+weaponAngle+tailRootAngle+tailMidAngle+tailTopAngle ) * 0.001f+ (1f - exertionRatio) * 0.002f;if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < 0f) lastReward = 0f;}totalReward += lastReward;AddReward( lastReward );}// else if(targetDistance > 1.5f)else{// AddReward(1f);judge.survived++;judge.Reset();return;}}}
//大致來說,
--1.獎勵視線,並使用Force Sharping
--2.獎勵投影至"跑動推薦向量"的速度和角度,並使用Force Sharping
--3.獎勵Root、Spine、雙臂特定向量(forward/up/right)符合指定角度,並使用Force Sharping
--4.獎勵尾巴全體符合指定角度,但"並不使用Force Sharping"
--5.獎勵減少動作變化
4.Force Sharping改為有容錯空間,但是容許值逆向Sharping
允許角色在5秒內發生總計2秒以內的失誤,希望藉此讓角色就算輕微失衡也能嘗試自行修正
但是容許值是逆向Sharping,會在開始Force Sharping後兩秒才逐步放寬標準
實驗時間:
Step: 5e7
Time Elapsed: 66620s (18.5hr)
實驗結果:
實驗結果為成功...不能這麼說
這次各尺寸平均不錯,尤其中間尺寸,但發現小尺寸有機會一直跑一直跌倒,但有時候測試又很不錯
然後終於發現了悲劇性的大問題
"我有自行指定武器重心,但在重設人物尺寸時,沒有連帶調整武器重心"
也就是武器重心會在第一個尺寸時設定 (而且這個尺寸是隨機的)在某個常數
然後不管武器如何縮放,Unity Rigidbody的Center of Mass指定後和比例無關
所以實際上不同尺寸的狗頭人,會覺得武器重心不同
這個大概就是導致前面全部的狗頭人跑步有各種不穩定的主因
然後也是導致狗頭人尺寸越大越容易穩定的理由,因為越大尺寸越容易覺得武器重心很近
然後也是導致狗頭人尺寸越大越容易穩定的理由,因為越大尺寸越容易覺得武器重心很近
首先觀察項以外,人物本身的肢體就會破壞Localize
然後最慘的是這個設計與管理失誤,導致COM其實每次運行都會微妙偏差
因此訓練模型和模擬環境對狗頭人哨兵來說應該實際上完全不同
然後運氣好如果偏差很小,就會突然又感覺不錯
然後最慘的是這個設計與管理失誤,導致COM其實每次運行都會微妙偏差
因此訓練模型和模擬環境對狗頭人哨兵來說應該實際上完全不同
然後運氣好如果偏差很小,就會突然又感覺不錯
不過總之狗頭人能跑動,而且幸好測試上這個問題目前看來並不用重練GetUp和Stand
而且逆向的容許值Sharping也被證實是有效的
不過總之好慘,真的是幸好有發現,尤其我還超級自作聰明
這個調整COM的腳本開始運行後會自動銷毀,所以一旦開始運行就無法觀測到
感覺目前就是因為COM有差會導致很難穩定奔跑
因此下個實驗是狗頭人哨兵追逐
1.移除武器的COM腳本
而且逆向的容許值Sharping也被證實是有效的
不過總之好慘,真的是幸好有發現,尤其我還超級自作聰明
這個調整COM的腳本開始運行後會自動銷毀,所以一旦開始運行就無法觀測到
感覺目前就是因為COM有差會導致很難穩定奔跑
因此下個實驗是狗頭人哨兵追逐
1.移除武器的COM腳本