主題

狼追逐 V6

夏洛爾 | 2023-01-30 21:33:25 | 巴幣 0 | 人氣 127

Wolf Run V6

實(shí)驗(yàn)?zāi)繕?biāo):

1.進(jìn)入靜立狀態(tài)後，進(jìn)入追逐狀態(tài)，在追逐狀態(tài)下，要能持續(xù)跑至接近目標(biāo)的距離內(nèi)

2.尺寸介於1-5倍

實(shí)驗(yàn)設(shè)計(jì):

1.任何弱點(diǎn)觸地皆失敗 (尾巴和四個(gè)小腿並非是弱點(diǎn))

2.非弱點(diǎn)肢體

if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}

3.

//Set: judge.endEpisode = true//Set: judge.episodeLength = 30f//Set: useClampReward = true//Set: SharpingBuffer Len=250 Th=-0.4if(weaknessOnGround){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outLife++;judge.Reset();return;}}else if(wolfRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outY++;judge.Reset();return;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;Vector3 forwardDir = flatTargetVelocity.normalized;Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));//SideUpupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.1f;spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.up*-1f, Vector3.up));//SideLookspineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.up, Vector3.up));//SideLookrootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.forward * -1f, flatLeftDir));//For Sync runVector3 leftThighUpDir = Vector3.ProjectOnPlane(wolfLeftThigh.right, flatLeftDir);Vector3 rightThighUpDir = Vector3.ProjectOnPlane(wolfRightThigh.right, flatLeftDir);float thighUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftThighUpDir, rightThighUpDir));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.forward * -1f, flatLeftDir));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.forward * -1f, flatLeftDir));//For Sync runVector3 leftUpperArmUpDir = Vector3.ProjectOnPlane(wolfLeftUpperArm.right, flatLeftDir);Vector3 rightUpperArmUpDir = Vector3.ProjectOnPlane(wolfRightUpperArm.right, flatLeftDir);float upperArmUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftUpperArmUpDir, rightUpperArmUpDir));tailAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfTail.right, flatTargetVelocity));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;velocityCoef = Mathf.InverseLerp(0f, 15f*currentSize, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(!inferenceMode){if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 5f*currentSize, (Time.fixedTime - landingMoment - landingBufferTime)/15f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float motionLimit = Mathf.Lerp(0.5f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float motionLimit3 = Mathf.Lerp(0.5f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/5f);bool outMotion = lookAngle < motionLimit || upAngle < motionLimit || leftThighAngle < motionLimit2 || rightThighAngle < motionLimit2 || spineLookAngle < motionLimit || rootLookAngle < motionLimit // || spineUpAngle < motionLimit3 // || rootUpAngle < motionLimit3 || thighUpAngle < motionLimit2 || upperArmUpAngle < motionLimit2 || leftUpperArmAngle < motionLimit2 || rightUpperArmAngle < motionLimit2;if( outSpeed || outDirection || outMotion){// AddReward(-1f);if(outSpeed){#if UNITY_EDITORDebug.Log("outSpeed");#endifclampReward += -0.1f;judge.outSpeed++;}if(outDirection){#if UNITY_EDITORDebug.Log("outDirection");#endifclampReward += -0.1f;judge.outDirection++;}if(outMotion){#if UNITY_EDITORDebug.Log("outMotion");#endifclampReward += -0.1f;judge.outMotion++;}sharpingBuffer.PushVal(-1f);// judge.Reset();// return;}else{sharpingBuffer.PushVal(0f);}#if UNITY_EDITORsharpingVal = sharpingBuffer.GetSmoothVal();#endifif( sharpingBuffer.GetSmoothVal() < sharpingResetVal){AddReward(-1f);judge.Reset();return;}}if(IsOverSteps()){judge.Reset();return;}bool isFalling = avgVelocity.y < 0f;if(isFalling){lastReward = 0f;}else{lastReward = (velocityAngleCoef + velocityCoef) * 0.02f + (lookAngle+upAngle) * 0.01f + (leftThighAngle+rightThighAngle+leftUpperArmAngle+rightUpperArmAngle) * 0.0025f+ (spineLookAngle+rootLookAngle+spineUpAngle+rootUpAngle) * 0.005f+ (tailAngle) * 0.005f+ (thighUpAngle + upperArmUpAngle) * 0.005f+ (1f - exertionRatio) * 0.005f;if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < -0.5f) lastReward = -0.5f;}totalReward += lastReward;AddReward( lastReward );}}// else if(targetDistance > 1.5f)else{// AddReward(1f);judge.survived++;judge.Reset();return;}}}

//大致來(lái)說(shuō)，

--1.獎(jiǎng)勵(lì)視線，並使用Force Sharping

--2.獎(jiǎng)勵(lì)投影至"跑動(dòng)推薦向量"的速度和角度，並使用Force Sharping

--3.獎(jiǎng)勵(lì)四個(gè)大腿的Side Look，並使用Force Sharping

--4.獎(jiǎng)勵(lì)尾巴符合指定角度

--5.獎(jiǎng)勵(lì)減少動(dòng)作變化

--6.獎(jiǎng)勵(lì)雙手和雙足要同步奔跑

--7.Motion相關(guān)的Force Sharping非從0開(kāi)始

--8.引導(dǎo)身體要盡量平行地面

--9.速度要求正比尺寸

--10.提高ForceSharping的要求，尤其進(jìn)入階段

4.Force Sharping改為有容錯(cuò)空間，但是容許值逆向Sharping

允許角色在5秒內(nèi)發(fā)生總計(jì)2秒以內(nèi)的失誤，希望藉此讓角色就算輕微失衡也能嘗試自行修正

但是容許值是逆向Sharping，會(huì)在開(kāi)始Force Sharping後兩秒才逐步放寬標(biāo)準(zhǔn)

5.四腳需輪流著地

6.只有上昇過(guò)程可以得分，以避免狼試圖滑翔然後墜機(jī)

實(shí)驗(yàn)時(shí)間:

Step: 5e8

Time Elapsed: 101822s (28.28hr)

實(shí)驗(yàn)結(jié)果:

實(shí)驗(yàn)結(jié)果為失敗

狼變成移動(dòng)緩慢的小笨狗

是不是陷入矛盾了呢?

不使用速度Force Sharping，狼就不會(huì)採(cǎi)取加速Gait

但一旦使用速度Force Sharping，四腳誘導(dǎo)就會(huì)導(dǎo)致被淘汰

但一旦取消四腳誘導(dǎo)，狼就會(huì)變雙足奔跑

有幾個(gè)靈感

1.放寬四足奔跑

目前看起來(lái)四足奔跑不利起步

但是加速後的確很強(qiáng)

所以

1A: 加速後才啟用四足奔跑

但非連續(xù)性的問(wèn)題是很容易讓PPO產(chǎn)生Gait後無(wú)所適從

1B: 放寬四足奔跑步數(shù)差距

目前是四足步數(shù)不能超過(guò)2步，但也許可以放寬，讓其短程調(diào)整可以，但長(zhǎng)程還是得四足

但一樣有Gait問(wèn)題的可能

1C: 四足奔跑是時(shí)段性

要和Buffer一樣，是一段時(shí)間內(nèi)的紀(jì)錄

這樣就算踩出調(diào)整的一兩步，也不會(huì)因永久前科而變成無(wú)法調(diào)整

2.取消雙腳同步誘導(dǎo)

原本是想讓其同步奔跑，但有可能這個(gè)誘導(dǎo)和四足誘導(dǎo)交互起來(lái)導(dǎo)致狼窒礙難行

3.調(diào)整肌力

目前有狼很容易翹起來(lái)的現(xiàn)象，但理由不明，有可能是肌力比例不對(duì)

2看起來(lái)很有試試看的價(jià)值

因此下個(gè)實(shí)驗(yàn)

1.恢復(fù)速度Force Sharping

2.取消雙腳同步誘導(dǎo)

#自律感知演化物理性角色 #紅蓮人偶 #狼

0

留言

創(chuàng)作回應(yīng)

夏洛爾 sherlore

追蹤創(chuàng)作集

作者相關(guān)創(chuàng)作

作品資料夾

ETH官方钱包

狼追逐 V6

創(chuàng)作回應(yīng)

作者相關(guān)創(chuàng)作

相關(guān)創(chuàng)作

更多創(chuàng)作

ETH官方钱包

狼 追逐 V6

創(chuàng)作回應(yīng)

作者相關(guān)創(chuàng)作

相關(guān)創(chuàng)作

更多創(chuàng)作

狼追逐 V6