Kobold Sentinel Attack V2
實驗目標:
1.設法用武器用力的打擊目標
實驗設計:
1.任何弱點觸地皆失敗 (尾巴、武器和Calf並非弱點)
2.當武器OnCollisionEnter Player
//enterCoef = 0.05f
agent.AddReward( Mathf.Clamp01(collision.impulse.magnitude * enterCoef) );
3.
//Set: judge.endEpisode = true//Set: judge.episodeLength = 3.3f//Set: weapon, tail not weakness//Set: useClampReward = trueif(weaknessOnGround){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{// ===Train Attack===if(!hitTarget){float survivedTime = Time.fixedTime - arrivedMoment;if(survivedTime < judge.episodeLength ){AddReward( (survivedTime - judge.episodeLength) * 0.1f );}}judge.outLife++;judge.Reset();return;//===Train Other===// brainMode = BrainMode.GetUp;// SetModel("KoboldGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else if(koboldRoot.localPosition.y < -1f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("KoboldGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{// ===Train Attack===if(!hitTarget){float survivedTime = Time.fixedTime - arrivedMoment;if(survivedTime < judge.episodeLength ){AddReward( (survivedTime - judge.episodeLength) * 0.3f );}}judge.outY++;judge.Reset();return;// ===Train Other===// brainMode = BrainMode.GetUp;// SetModel("KoboldGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else{if(hitTarget){// float myVelocity = velocityBuffer.GetSmoothVal().magnitude;// float myAngularVelocity = Vector3.Project(angularVelocityBuffer.GetSmoothVal(), Vector3.up).magnitude;float targetVelocity = targetVelocityBuffer.GetSmoothVal().magnitude;float targetAngularVelocity = targetAngularVelocityBuffer.GetSmoothVal().magnitude;// velocityCoef = Mathf.InverseLerp(15f, 0f, myVelocity );// float angularVelocityCoef = Mathf.InverseLerp(0f, 30f, myAngularVelocity );velocityCoef = Mathf.InverseLerp(0f, 20f, targetVelocity );float angularVelocityCoef = Mathf.InverseLerp(0f, 30f, targetAngularVelocity );lastReward = velocityCoef * 0.05f + angularVelocityCoef * 0.05f;totalReward += lastReward;AddReward(lastReward);}}
//大致來說
1.打擊得分變成僅限初擊
2.在打擊後,得分加上目標速度和角速度
實驗時間:
Step: 5e7
Time Elapsed: 127742s (35.48hr)
實驗結果:
實驗結果為失敗,狗頭人使用武器的意圖極弱
但是可以看到狗頭人有明顯的攻擊行為,就是靠兩隻腳的前踢來設法踢飛紅蓮
因此應該可以認為有兩個現象
1.踢腿比揮舞武器更有效和更容易
因為獎勵項目有紅蓮的速度,所以比起把紅蓮打倒在地,往上或水平方向擊飛更有優勢
另外踢腿可說是走路的一種變形
2.視覺的必要性
狗頭人沒有視覺,只能知道目標的座標,因此無法意識到紅蓮的肢體存在
這可能也造成打擊效果不好,例如很容易被卡住,因此撞擊顯得更為實際
另外目前觀察項只知道目標的座標,卻不知道目標的旋轉量
這可能也造就打擊很不切實際
由於還想思考一下,與此同時很在意SAC在此情境的結果
因此下個實驗是
1.相同實驗,但使用SAC訓練