ETH官方钱包

前往
大廳
主題

狗頭人哨兵 攻擊 7

夏洛爾 | 2023-02-27 16:37:28 | 巴幣 0 | 人氣 164

Kobold Sentinel Attack V6

實驗?zāi)繕?biāo):
1.設(shè)法用武器用力的打擊目標(biāo)

實驗設(shè)計:
1.任何弱點觸地皆失敗 (尾巴、武器和Calf並非弱點)
2.當(dāng)武器OnCollisionEnter Player
會傳送collision.impulse
//impulseRewardCoef = 0.05f
public void HitWithWeapon(Vector3 impulse){if(!hitTarget){avgVelocity = velocityBuffer.GetSmoothVal();hitOnVelocity = avgVelocity.normalized;float reward = Vector3.ProjectOnPlane(impulse, hitOnVelocity).magnitude * impulseRewardCoef;lastReward += reward;totalReward += reward;AddReward( reward );arrivedMoment = Time.fixedTime;hitTarget = true;}}
3.
//Set: judge.endEpisode = true
//Set: judge.episodeLength = 3.3f
//Set: weapon, tail not weakness
//Set: useClampReward = true

if(weaknessOnGround)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
// ===Train Attack===
if(!hitTarget)
{
float survivedTime = Time.fixedTime - arrivedMoment;
if(survivedTime < judge.episodeLength )
{
AddReward( (survivedTime - judge.episodeLength) * 0.1f );
}
}
judge.outLife++;
judge.Reset();
return;
//===Train Other===
// brainMode = BrainMode.GetUp;
// SetModel("KoboldGetUp", getUpBrain);
// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}
else if(koboldRoot.localPosition.y < -1f)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
// ===Train Attack===
if(!hitTarget)
{
float survivedTime = Time.fixedTime - arrivedMoment;
if(survivedTime < judge.episodeLength )
{
AddReward( (survivedTime - judge.episodeLength) * 0.3f );
}
}
judge.outY++;
judge.Reset();
return;
// ===Train Other===
// brainMode = BrainMode.GetUp;
// SetModel("KoboldGetUp", getUpBrain);
// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}
/*else if( IsCollideWithBody() )
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
// ===Train Attack===
if(!hitTarget)
{
float survivedTime = Time.fixedTime - arrivedMoment;
if(survivedTime < judge.episodeLength )
{
AddReward( (survivedTime - judge.episodeLength) * 0.1f );
}
}
judge.outLife++;
judge.Reset();
return;
//===Train Other===
// brainMode = BrainMode.GetUp;
// SetModel("KoboldGetUp", getUpBrain);
// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}*/
else
{
if(hitTarget)
{
targetSmoothVelocity = targetVelocityBuffer.GetSmoothVal();
lastReward = 0.003f + Vector3.ProjectOnPlane(targetSmoothVelocity, hitOnVelocity).magnitude * 0.05f;
totalReward += lastReward;
AddReward(lastReward);
if(inferenceMode)
{
if(hasArrived && Time.fixedTime - arrivedMoment >= judge.episodeLength)
{
hitTarget = false;
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
}
}
else
{
targetSmoothPosition = targetPositionBuffer.GetSmoothVal();
rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldRootRb.position);
flatTargetVelocity = rootDir;
flatTargetVelocity.y = 0f;
aimVelocity = flatTargetVelocity.normalized;
aimVelocity.y = 0.2f;
avgVelocity = velocityBuffer.GetSmoothVal();
velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);
velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);
velocityCoef = Mathf.InverseLerp(0f, 5f, Vector3.Project(avgVelocity, aimVelocity).magnitude );
avgAngularVelocity = angularVelocityBuffer.GetSmoothVal();
float angularVelocityScore = avgAngularVelocity.y > 0f ? avgAngularVelocity.y * 0.001f : 0f;
lastReward = velocityAngleCoef*0.001f + velocityCoef*0.001f + angularVelocityScore;
totalReward += lastReward;
AddReward(lastReward);
}
}

//大致來說
在命中目標(biāo)時紀(jì)錄當(dāng)下自身的移動方向
之後,獎勵和移動方向不一樣的敵人速度值

而命中目標(biāo)前,獎勵往目標(biāo)方向的角度和速度,然後獎勵自身的旋轉(zhuǎn)

實驗時間:
Step: 5e7
Time Elapsed: --s (--hr)

實驗結(jié)果:
實驗結(jié)果為沒有突破點的成功,因此算是失敗

狗頭人可以穩(wěn)定攻擊位置不同的目標(biāo),但是感覺不出來有很大力
硬要說的話似乎有藉由"叩首"來增加向下打擊的力量

但還是不顯著,至少沒有蓄力的動作,命中前也沒有旋轉(zhuǎn)等現(xiàn)象

但是和朋友聊天後才發(fā)現(xiàn)我忘記了很重要的ML屬性--Stacked Vector
尤其Unity ML某個版本之後還可以只對某幾個觀察項進(jìn)行Stacked

這個方向應(yīng)該極有可能讓狗頭人願意做蹲低跳高的行為,因為這種觀察方式就是直接包含連續(xù)動作
以前因為電腦不好會輕易造成效能瓶頸而延宕這方向的研究,但因為時間太長就忘記了!

因此狗頭人將進(jìn)行全新系列的研究,將以Stacked為基礎(chǔ),重新設(shè)計觀察項和全部的訓(xùn)練
並且由於原本擔(dān)心狗頭人視野很難處理,不知道怎麼合適的加上視覺 (加很少有可能看不到,加很多可能讓效能爆炸),但如果已經(jīng)Stacked,那視覺應(yīng)該加一點即可

但為了命名上有區(qū)別,所以後續(xù)系列就叫做 "狗頭人哨兵隊長"
期勉我家狗頭人可以表現(xiàn)得更好
追蹤 創(chuàng)作集

作者相關(guān)創(chuàng)作

更多創(chuàng)作