厄啊啊啊...我是一個科學(xué)家,我會不帶入感情,寫下客觀冰冷的研究紀錄
Kobold Sentinel Run V2
實驗?zāi)繕?
1.進入靜立狀態(tài)後,進入追逐狀態(tài),在追逐狀態(tài)下,要能持續(xù)跑至接近目標的距離內(nèi)
2.動作引導(dǎo)為雙臂展開身體前傾的帥氣奔跑動作
實驗設(shè)計:
1.任何弱點觸地皆失敗 (尾巴和劍並非弱點)
2.使用ClampReward
if(koboldBodies[i].damageCoef > 0f){clampReward += -0.1f * koboldBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = false//Set: nearModeRange = 1f//Set: weapon, tail is not weakness. If is, Stand would back to GetUpif(weaknessOnGround){// LogWeaknessOnGround();if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outLife++;judge.Reset();return;}}else if(koboldRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outY++;judge.Reset();return;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.up, headDir));upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.right * -1f, Vector3.up));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.2f;//LeanVector3 leanDir = rootAimRot * flatTargetVelocity;spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldSpine.right * -1f, leanDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRoot.up, leanDir));//Naruto ArmVector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftUpperArm.right, leftUpperArmAimRot * flatTargetVelocity));leftForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftForeArm.right, leftForeArmAimRot * flatTargetVelocity));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightUpperArm.right, rightUpperArmAimRot * flatTargetVelocity));rightForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightForeArm.right, rightForeArmAimRot * flatTargetVelocity));weaponAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldWeapon.up, weaponAimRot * flatTargetVelocity));tailRootAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailRoot.right, flatTargetVelocity));tailMidAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailMid.right, flatTargetVelocity));tailTopAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailTop.right, flatTargetVelocity));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightThigh.forward * -1f, flatLeftDir));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;velocityCoef = Mathf.InverseLerp(0f, 8f, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(!inferenceMode){if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float motionLimit = Mathf.Lerp(0f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outMotion = lookAngle < motionLimit2 || upAngle < motionLimit2 || leftThighAngle < motionLimit2 || rightThighAngle < motionLimit2 || spineUpAngle < motionLimit || rootUpAngle < motionLimit || leftUpperArmAngle < motionLimit || leftForeArmAngle < motionLimit || rightUpperArmAngle < motionLimit || rightForeArmAngle < motionLimit;// || weaponAngle < motionLimit;if( outSpeed || outDirection || outMotion){AddReward(-1f);if(outSpeed){judge.outSpeed++;}if(outDirection){judge.outDirection++;}if(outMotion){judge.outMotion++;}judge.Reset();return;}}lastReward = (velocityAngleCoef + velocityCoef) * 0.02f + (lookAngle+upAngle) * 0.0125f + (leftThighAngle+rightThighAngle) * 0.0075f+ (spineUpAngle+rootUpAngle) * 0.005f+ (leftUpperArmAngle+leftForeArmAngle+rightUpperArmAngle+rightForeArmAngle+weaponAngle+tailRootAngle+tailMidAngle+tailTopAngle ) * 0.001f+ (1f - exertionRatio) * 0.002f;if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < 0f) lastReward = 0f;}totalReward += lastReward;AddReward( lastReward );}// else if(targetDistance > 1.5f)else{// AddReward(1f);judge.survived++;judge.Reset();return;}}}
//大致來說,
--1.獎勵視線,並使用Force Sharping
--2.獎勵投影至"跑動推薦向量"的速度和角度,並使用Force Sharping
--3.獎勵Root、Spine、雙臂特定向量(forward/up/right)符合指定角度,並使用Force Sharping
--4.獎勵尾巴全體符合指定角度,但"並不使用Force Sharping"
--5.獎勵減少動作變化
實驗時間:
Step: 5e7
Time Elapsed: 84024s (23.34hr)
實驗結(jié)果:
實驗結(jié)果為部份成功,部分失敗
狗頭人還是會無法持續(xù)奔跑,而且根據(jù)觀察體型越小越容易失敗
研究後發(fā)現(xiàn)
1.腳掌太薄,小腿護具碰撞框太大
這導(dǎo)致狗頭人哨兵可能是很理想的奔跑,卻會被判定成小腿觸地,根據(jù)Log資訊大多失衡屬於此類
大概跟體型有關(guān)的就是這點
看起來有可能是腳掌太薄,以致計算稍有延遲就會讓小腿接觸地面,或是因為小腿脛骨的護具很大,以致身體前傾時就可能刮到地面
2.大腿Force Sharping導(dǎo)致就算失衡也不允許修正,因為不允許側(cè)開
3.被武器或尾巴絆倒
不誘導(dǎo)武器的情況,狗頭人哨兵目前是傾向把武器拿在胸前,但晃動時敲到胸膛或腳都會導(dǎo)致失衡
而狗頭人奔跑時,也可能往後擺腿時,踢到自己的尾巴
關(guān)於這點搞不清楚,該判斷為身體能力不足,還是訓(xùn)練數(shù)不足
厄啊啊啊,下個實驗
姑且是因為根據(jù)TensorBoard,其實訓(xùn)練後段得分指數(shù)上升,所以想要數(shù)據(jù)就直接先跑了一個訓(xùn)練步數(shù)1.5e8的新實驗
但如果體型小還是會有相同問題,那麼就有必要修正碰撞框,然後全部重頭練
尤其現(xiàn)在看起來,狗頭人哨兵沒有喜歡全身前傾奔跑的理由,很有可能就是脛骨護具會碰到地面
重練好麻煩啊啊啊啊啊~~~
好消息是
1.也許不用動到前兩個模型,因為前兩個模型大概也不喜歡讓小腿觸地,把脛骨碰撞框縮小,也許不會影響太大,另外真的要重練,其實加起來也就是兩天以內(nèi)
2.總覺得可以趁機實驗一下,考慮目前起身到靜立是盡速切換式,也許可以實驗一次練兩個模型,這種迷你一條龍的訓(xùn)練方式,雖然以目前來看未必有比較方便,不過仍然存在優(yōu)化和進步的可能性
警告:
稍微試了一下,加厚腳掌和縮小脛骨碰撞框,小腿觸地的情況確實少很多
但是仍然不會穩(wěn)定跑動,變成轉(zhuǎn)移到OutMotion
另外有觀測到OutSpeed,這裡發(fā)覺也許不同體型的紅蓮人偶,接受相同的速度Force Sharping是不對等的
但總之目前認為是訓(xùn)練數(shù)不足的可能性變得更高,但這也說明可能不同尺寸並沒有恰當(dāng)?shù)谋籐ocalize