1. 依赖安装
pip install ai2thor
pip install numpy pillow opencv-python
2. 验证安装
from ai2thor. controller import Controller
controller = Controller( scene= "FloorPlan1" )
controller. step( action= "MoveAhead" )
print ( "Success!" )
3.手动键盘控制
from ai2thor. controller import Controller
import keyboardcontroller = Controller( scene= "FloorPlan1" , gridSize= 0.25 , rotateStepDegrees= 45
) print ( "WASD控制移动,QE旋转,F抓取,ESC退出" ) while True : event = controller. step( action= "Pass" ) if keyboard. is_pressed( 'esc' ) : break elif keyboard. is_pressed( 'w' ) : event = controller. step( action= "MoveAhead" ) elif keyboard. is_pressed( 's' ) : event = controller. step( action= "MoveBack" ) elif keyboard. is_pressed( 'a' ) : event = controller. step( action= "MoveLeft" ) elif keyboard. is_pressed( 'd' ) : event = controller. step( action= "MoveRight" ) elif keyboard. is_pressed( 'q' ) : event = controller. step( action= "RotateLeft" ) elif keyboard. is_pressed( 'e' ) : event = controller. step( action= "RotateRight" ) elif keyboard. is_pressed( 'f' ) : objects = [ obj for obj in event. metadata[ "objects" ] if obj[ "visible" ] ] if objects: event = controller. step( action= "PickupObject" , objectId= objects[ 0 ] [ "objectId" ] , forceAction= True ) controller. stop( )
4. 定位目标物体(以微波炉为例)
from ai2thor. controller import Controllercontroller = Controller( scene= "FloorPlan1" , visibilityDistance= 1.5 , renderInstanceSegmentation= True
)
microwaves = [ obj for obj in controller. last_event. metadata[ "objects" ] if obj[ "objectType" ] == "Microwave" ] if microwaves: target_id = microwaves[ 0 ] [ "objectId" ] controller. step( action= "MoveToObject" , objectId= target_id, moveMagnitude= 0.5 ) event = controller. step( action= "PickupObject" , objectId= target_id) if event. metadata[ "lastActionSuccess" ] : print ( "抓取成功!" ) else : print ( "抓取失败:" , event. metadata[ "errorMessage" ] )
else : print ( "场景中没有微波炉" ) controller. stop( )
5. 多房间导航
controller. reset( scene= "FloorPlan201" )
event = controller. step( action= "GetReachablePositions" )
reachable_positions = event. metadata[ "actionReturn" ]
import random
target_pos = random. choice( reachable_positions)
controller. step( action= "MoveTo" , position= target_pos, forceAction= True
)
6. 视觉辅助抓取
event = controller. step( action= "Pass" )
for obj in event. instance_segmentation_frame: if obj[ "objectType" ] == "Mug" : center_x = obj[ "axisAlignedBoundingBox" ] [ "center" ] [ "x" ] center_z = obj[ "axisAlignedBoundingBox" ] [ "center" ] [ "z" ] controller. step( action= "MoveTo" , position= dict ( x= center_x, y= 0 , z= center_z) , forceAction= True ) controller. step( action= "PickupObject" , objectId= obj[ "objectId" ] ) break
7. 语音控制
import speech_recognition as sr
r = sr. Recognizer( )
with sr. Microphone( ) as source: print ( "请说指令:" ) audio = r. listen( source) command = r. recognize_google( audio, language= 'zh-CN' )