Wolf Run V2
實(shí)驗(yàn)?zāi)繕?biāo):
1.進(jìn)入靜立狀態(tài)後,進(jìn)入追逐狀態(tài),在追逐狀態(tài)下,要能持續(xù)跑至接近目標(biāo)的距離內(nèi)
2.尺寸介於1-5倍
實(shí)驗(yàn)設(shè)計(jì):
1.任何弱點(diǎn)觸地皆失敗 (尾巴和四個(gè)小腿並非是弱點(diǎn))
2.非弱點(diǎn)肢體
if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = false//Set: useClampReward = true//Set: SharpingBuffer Len=250 Th=-0.4if(weaknessOnGround){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outLife++;judge.Reset();return;}}else if(wolfRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outY++;judge.Reset();return;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;Vector3 forwardDir = flatTargetVelocity.normalized;Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));//SideUpupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.2f;//SideUpspineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.forward * -1f, flatLeftDir));//For Sync runVector3 leftThighUpDir = Vector3.ProjectOnPlane(wolfLeftThigh.right, flatLeftDir);Vector3 rightThighUpDir = Vector3.ProjectOnPlane(wolfRightThigh.right, flatLeftDir);float thighUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftThighUpDir, rightThighUpDir));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.forward * -1f, flatLeftDir));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.forward * -1f, flatLeftDir));//For Sync runVector3 leftUpperArmUpDir = Vector3.ProjectOnPlane(wolfLeftUpperArm.right, flatLeftDir);Vector3 rightUpperArmUpDir = Vector3.ProjectOnPlane(wolfRightUpperArm.right, flatLeftDir);float upperArmUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftUpperArmUpDir, rightUpperArmUpDir));tailAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfTail.right, flatTargetVelocity));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;float sizeScale = Mathf.Lerp(1f, 2.5f, currentSize/5f);velocityCoef = Mathf.InverseLerp(0f, 15f*sizeScale, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(!inferenceMode){if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f*sizeScale, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 30f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float motionLimit = Mathf.Lerp(0f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float motionLimit2 = Mathf.Lerp(0.3f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/5f);bool outMotion = lookAngle < motionLimit2 || upAngle < motionLimit2 || leftThighAngle < motionLimit2 || rightThighAngle < motionLimit2 || spineUpAngle < motionLimit || rootUpAngle < motionLimit || thighUpAngle < motionLimit2 || upperArmUpAngle < motionLimit2 || leftUpperArmAngle < motionLimit2 || rightUpperArmAngle < motionLimit2;if( outSpeed || outDirection || outMotion){// AddReward(-1f);if(outSpeed){#if UNITY_EDITORDebug.Log("outSpeed");#endifclampReward += -0.05f;judge.outSpeed++;}if(outDirection){#if UNITY_EDITORDebug.Log("outDirection");#endifclampReward += -0.05f;judge.outDirection++;}if(outMotion){#if UNITY_EDITORDebug.Log("outMotion");#endifclampReward += -0.05f;judge.outMotion++;}sharpingBuffer.PushVal(-1f);// judge.Reset();// return;}else{sharpingBuffer.PushVal(0f);}#if UNITY_EDITORsharpingVal = sharpingBuffer.GetSmoothVal();#endifif( sharpingBuffer.GetSmoothVal() < sharpingResetVal){AddReward(-1f);judge.Reset();return;}}lastReward = (velocityAngleCoef + velocityCoef) * 0.02f + (lookAngle+upAngle) * 0.01f + (leftThighAngle+rightThighAngle+leftUpperArmAngle+rightUpperArmAngle) * 0.005f+ (spineUpAngle+rootUpAngle) * 0.005f+ (tailAngle) * 0.005f+ (thighUpAngle + upperArmUpAngle) * 0.005f+ (1f - exertionRatio) * 0.005f;if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < -0.05f) lastReward = -0.05f;}totalReward += lastReward;AddReward( lastReward );}// else if(targetDistance > 1.5f)else{// AddReward(1f);judge.survived++;judge.Reset();return;}}}
//大致來(lái)說(shuō),
--1.獎(jiǎng)勵(lì)視線,並使用Force Sharping
--2.獎(jiǎng)勵(lì)投影至"跑動(dòng)推薦向量"的速度和角度,並使用Force Sharping
--3.獎(jiǎng)勵(lì)四個(gè)大腿的Side Look,並使用Force Sharping
--4.獎(jiǎng)勵(lì)尾巴符合指定角度
--5.獎(jiǎng)勵(lì)減少動(dòng)作變化
--6.獎(jiǎng)勵(lì)雙手和雙足要同步奔跑
--7.Motion相關(guān)的Force Sharping非從0開(kāi)始
4.Force Sharping改為有容錯(cuò)空間,但是容許值逆向Sharping
允許角色在5秒內(nèi)發(fā)生總計(jì)2秒以?xún)?nèi)的失誤,希望藉此讓角色就算輕微失衡也能?chē)L試自行修正
但是容許值是逆向Sharping,會(huì)在開(kāi)始Force Sharping後兩秒才逐步放寬標(biāo)準(zhǔn)
實(shí)驗(yàn)時(shí)間:
Step: 5e7
Time Elapsed: 94401s (26.22hr)
實(shí)驗(yàn)結(jié)果:
實(shí)驗(yàn)結(jié)果為成功,但不理想
藉由強(qiáng)制雙手雙腳需要同步奔跑,狼有雙手雙腳同步奔跑了
但是問(wèn)題有三
1.其實(shí)只有三隻腳
左手幾乎是沒(méi)有用上,只有偶爾才輔助
2.小尺寸奔跑能力較弱
目前看起來(lái)是因?yàn)榇蟛糠謺?huì)因Out Of Speed被Force Sharping淘汰
這裡認(rèn)為是非對(duì)稱(chēng)問(wèn)題,由於不希望體型大的狼跑太快,所以體型越大速度要求越低
但看來(lái)反而讓同動(dòng)作,體型小的狼無(wú)法抵達(dá)目標(biāo)速度,體型大才可以
但因?yàn)楸緦?shí)驗(yàn)沒(méi)有設(shè)定極限時(shí)間,所以又發(fā)生訓(xùn)練量不均勻的問(wèn)題
3.狼會(huì)翹起來(lái)
不知為何身體曲線抬很高
檢查後也不覺(jué)得狼有前後長(zhǎng)短腳,所以感覺(jué)是因?yàn)檫M(jìn)入Gait
初期亂加速就很容易抬起來(lái),然後因?yàn)橐矝](méi)差,就所幸一直抬著
另外感覺(jué)左右搖擺的情況還是很?chē)?yán)重
因此下個(gè)實(shí)驗(yàn)為狼追逐
1.引導(dǎo)身體要盡量平行地面
2.速度要求正比尺寸
3.提高ForceSharping的要求,尤其進(jìn)入階段