diff --git a/config/include/ranger_test.yml b/config/include/ranger_test.yml index 0cd0f0f..e7f73e0 100644 --- a/config/include/ranger_test.yml +++ b/config/include/ranger_test.yml @@ -17,6 +17,7 @@ skill: - semantic_map - test_skill - pointcloud + - llm brain: - test_brian diff --git a/examples/demo2/ranger.action b/examples/demo2/ranger.action index e7aae61..6378ccc 100644 --- a/examples/demo2/ranger.action +++ b/examples/demo2/ranger.action @@ -22,6 +22,7 @@ def test_ranger(ranger_path: EntityPath) -> EOS_TYPE_ActionResult: x, y, yaw = result action_print(f"ranger pose before move: x={x}, y={y}, yaw={yaw}") + return EOS_TYPE_ActionResult.SUCCESS # move forward 1 meter diff --git a/examples/demo6_findchair/findchair.action b/examples/demo6_findchair/findchair.action new file mode 100644 index 0000000..5671c03 --- /dev/null +++ b/examples/demo6_findchair/findchair.action @@ -0,0 +1,125 @@ +from robonix.uapi.specs.skill_specs import EntityPath +from robonix.uapi.runtime.action import action, EOS_TYPE_ActionResult, get_runtime, action_print, action_info + +def extract_llm_response(llm_response: str): + import json + import re + """Extract action code and parameters from LLM response""" + try: + # Find JSON code block - updated regex to handle multi-line JSON properly + json_pattern = r"```json\s*\n(.*?)\n```" + json_match = re.search(json_pattern, llm_response, re.DOTALL) + + if json_match: + json_str = json_match.group(1).strip() + action_info(f"Extracted JSON string: {json_str}") + parsed_response = json.loads(json_str) + + return parsed_response + else: + action_info("No JSON format code block found in LLM response") + action_info(f"LLM response content: {llm_response}") + return None + + except Exception as e: + action_info(f"Failed to parse LLM response: {str(e)}") + action_info(f"LLM response content: {llm_response}") + return None +@action +def find_chair(ranger_path: EntityPath, server_pc_path: EntityPath) -> EOS_TYPE_ActionResult: + runtime = get_runtime() + ranger = runtime.get_graph().get_entity_by_path(ranger_path) + + if ranger is None: + action_print(f"ranger not found at path: {ranger_path}") + return EOS_TYPE_ActionResult.FAILURE + + # patrolling logic + action_info("Patrolling...") + + action_print("getting current pose") + + current_pose = ranger.cap_get_pose() + + if current_pose is None: + action_print("failed to get current pose") + return EOS_TYPE_ActionResult.FAILURE + x, y, yaw = current_pose + + action_print(f"current pose: x={x}, y={y}, yaw={yaw}") + + result = ranger.cap_pointcloud_to_file( + filename="./examples/demo2/pointcloud.ply") + if not result["success"]: + action_print("failed to save pointcloud to file") + return EOS_TYPE_ActionResult.FAILURE + action_print(f"pointcloud saved to file: success={result['success']}, points_collected={result['points_collected']}") + + model_bytes = open("./examples/demo2/pointcloud.ply", "rb").read() + + # then use skl_spatiallm_detect to detect the object + server_pc = get_runtime().get_graph().get_entity_by_path(server_pc_path) + + retry_count = 3 + + chair_pose = None + + while retry_count > 0: + try: + result = server_pc.skl_spatiallm_detect(ply_model=model_bytes) + + action_print(f"object detected: {result}") + + pose_res = ranger.skl_spatiallm_to_world_pose(spatiallm_txt=result.txt) + if pose_res is None: + action_print("failed to get world pose") + return EOS_TYPE_ActionResult.FAILURE + action_print(f"world pose: {pose_res}") + + request = f"all object pos are {pose_res}, please give me the dining chair position" + chair_pose = ranger.skl_ask_llm(api_key="sk-a93a0c965ad5490ea147db69300fd565", request=request) + + action_print(f"dining chair response: {chair_pose}") + chair_pose = extract_llm_response(chair_pose) + action_print(f"dining chair pose real: {chair_pose}") + + chair_pose_debug = None + for box in pose_res.bboxes: + if box.class_name == "dining_chair" or box.class_name == "chair": + chair_pose_debug = [box.world_center_x, box.world_center_y, box.world_center_z] + break + + action_print(f"dining chair pose debug: {chair_pose_debug}") + except Exception as e: + action_print(f"failed to detect object: {e}") + + if chair_pose is None: + action_print("dining chair not found") + if retry_count == 0: + action_print("dining chair not found") + return EOS_TYPE_ActionResult.FAILURE + # return EOS_TYPE_ActionResult.FAILURE + else: + break + action_print(f"retrying {retry_count}...") + retry_count -= 1 + + action_print(f"dining chair pose: x={chair_pose[0]}, y={chair_pose[1]}, yaw={chair_pose[2]}") + + action_print("moving...") + + # result = ranger.skl_move_to_ab_pos(x=chair_pose[0], y=chair_pose[1], yaw=0) + + # if result is None or result == "": + # action_print("failed to start movement") + # return EOS_TYPE_ActionResult.FAILURE + + action_print(f"movement command sent successfully: {result}") + action_print("find chair completed") + + return EOS_TYPE_ActionResult.SUCCESS + +''' +class_name='sofa', position_x=-2.191, position_y=-1.9520000000000002, position_z=0.03200000000000003, angle_z=-1.5708000000000002, scale_x=2.0, scale_y=0.828125, scale_z=1.03125, world_center_x=26.561971597607506, world_center_y=1.2528635693460008, world_center_z=0.03200000000000003) +class_name='dining_chair', position_x=-2.049, position_y=0.19799999999999995, position_z=-0.11599999999999999, angle_z=-1.5708000000000002, scale_x=0.640625, scale_y=0.6875, scale_z=0.984375, world_center_x=28.716615699389337, world_center_y=1.2397184312044487, world_center_z=-0.11599999999999999) +''' \ No newline at end of file diff --git a/examples/demo6_findchair/findchair.py b/examples/demo6_findchair/findchair.py new file mode 100644 index 0000000..8050bed --- /dev/null +++ b/examples/demo6_findchair/findchair.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +import sys +import os +import argparse +from pathlib import Path + +project_root = Path(__file__).parent.parent.parent +sys.path.insert(0, str(project_root)) + +project_root_parent = Path( + __file__ +).parent.parent.parent.parent # robonix root +sys.path.insert(0, str(project_root_parent)) + +from robonix.uapi import get_runtime, set_runtime +from robonix.manager.log import logger, set_log_level + +from robonix.skill import * + +set_log_level("debug") + +def init_skill_providers(runtime): + """Initialize skill providers for ranger demo""" + from robonix.uapi.runtime.provider import SkillProvider + + # dump __all__ in robonix.skill to skills list + try: + from robonix.skill import __all__ + skills = __all__ + except ImportError: + logger.warning("robonix.skill module not available") + skills = [] + + local_provider = SkillProvider( + name="local_provider", + IP="127.0.0.1", + skills=skills, + ) + + runtime.registry.add_provider(local_provider) + logger.info(f"Added skill providers: {runtime.registry}") + rtx5090server_provider = SkillProvider( + name="rtx5090server_provider", + IP="162.105.88.184", + port=50051, + skills=["skl_spatiallm_detect"], + ) + + runtime.registry.add_provider(rtx5090server_provider) + logger.info("Skill providers registered successfully") + runtime.dump_registry() + +def create_ranger_entity_builder(): + """Create a ranger-specific entity graph builder""" + def builder(runtime, **kwargs): + from robonix.uapi.graph.entity import create_root_room, create_controllable_entity + + root_room = create_root_room() + runtime.set_graph(root_room) + + ranger = create_controllable_entity("ranger") + root_room.add_child(ranger) + + ranger.bind_skill("cap_get_pose", get_pose) + ranger.bind_skill("cap_set_goal", simple_set_goal) + ranger.bind_skill("skl_move_to_rel_pos", move_to_rel_pos) + ranger.bind_skill("skl_move_to_ab_pos", move_to_ab_pos) + ranger.bind_skill("cap_pointcloud_to_file", cap_pointcloud_to_file) + ranger.bind_skill("skl_ask_llm", skl_ask_llm, provider_name="local_provider") + + logger.info("Ranger entity graph initialized:") + logger.info(f" root room: {root_room.get_absolute_path()}") + logger.info(f" ranger: {ranger.get_absolute_path()}") + ranger.bind_skill("cap_pointcloud_to_file", + cap_pointcloud_to_file, provider_name="local_provider") + + server_pc = create_controllable_entity("server_pc") + root_room.add_child(server_pc) + + server_pc.bind_skill("skl_spatiallm_detect", + __rpc_skl_spatiallm_detect, + provider_name="rtx5090server_provider") + ranger.bind_skill("skl_spatiallm_to_world_pose", skl_spatiallm_to_world_pose, provider_name="local_provider") + + return builder + + +def main(): + parser = argparse.ArgumentParser(description="Find Chair Demo") + parser.add_argument( + "--export-scene", + type=str, + help="Export scene information to JSON file", + ) + args = parser.parse_args() + + logger.info("Starting find_chair demo") + + runtime = get_runtime() + runtime.register_entity_builder("ranger", create_ranger_entity_builder()) + init_skill_providers(runtime) + runtime.build_entity_graph("ranger") + set_runtime(runtime) + runtime.print_entity_tree() + if args.export_scene: + scene_info = runtime.export_scene_info(args.export_scene) + logger.info(f"Scene information exported to: {args.export_scene}") + + action_program_path = os.path.join( + os.path.dirname(__file__), "findchair.action") + logger.info(f"Loading action program from: {action_program_path}") + + try: + action_names = runtime.load_action_program(action_program_path) + logger.info(f"Loaded action functions: {action_names}") + + runtime.configure_action("find_chair", ranger_path="/ranger", server_pc_path="/server_pc") + + logger.info("Executing find_chair action...") + thread = runtime.start_action("find_chair") + result = runtime.wait_for_action("find_chair", timeout=90.0) + + logger.info(f"find_chair completed with result: {result}") + logger.info("Demo completed successfully") + return 0 + + except Exception as e: + logger.error(f"Demo failed: {str(e)}", exc_info=True) + return 1 + + +if __name__ == "__main__": + exit_code = main() + sys.exit(exit_code) diff --git a/robonix/capability/nav2_webots_tiago/plugins/ros2_navigation/lib.py b/robonix/capability/nav2_webots_tiago/plugins/ros2_navigation/lib.py index 92b58d4..128f638 100644 --- a/robonix/capability/nav2_webots_tiago/plugins/ros2_navigation/lib.py +++ b/robonix/capability/nav2_webots_tiago/plugins/ros2_navigation/lib.py @@ -74,6 +74,6 @@ def set_goal(x, y, yaw) -> str: return func_status if __name__ == "__main__": - rclpy.init() + # rclpy.init() nv_controller = NavWithUltrasonicSafety() diff --git a/robonix/skill/llm/__init__.py b/robonix/skill/llm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/robonix/skill/llm/api/__init__.py b/robonix/skill/llm/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/robonix/skill/llm/api/api.py b/robonix/skill/llm/api/api.py new file mode 100644 index 0000000..00a54af --- /dev/null +++ b/robonix/skill/llm/api/api.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import os +from mcp.server.fastmcp import FastMCP + +import rclpy +from rclpy.node import Node +from geometry_msgs.msg import PoseStamped +from nav2_simple_commander.robot_navigator import BasicNavigator, TaskResult +from sensor_msgs.msg import Range +import sys + +import sys +root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) +if root_dir not in sys.path: + sys.path.append(root_dir) +print(root_dir) +from robonix.manager.eaios_decorators import eaios + +import requests +import json + +@eaios.caller +def skl_ask_llm(self_entity, api_key, prompt=None, request=""): + """ + Query DeepSeek API with the given parameters + + Parameters: + api_key: DeepSeek API key for authentication + prompt: Context/content for the conversation, if None read from prompt.txt + request: User's question or request + + Returns: + API response content as string + """ + + # If prompt is empty, read context from prompt.txt + if prompt is None: + try: + prompt_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config/prompt.txt') + with open(prompt_path, 'r', encoding='utf-8') as f: + prompt = f.read() + except FileNotFoundError: + print("Warning: prompt.txt file not found, using empty context") + prompt = "" + + # Construct messages list for the API + messages = [] + + # Add context as system message (if available) + if prompt.strip(): + messages.append({ + "role": "system", + "content": prompt + }) + + # Add user request + messages.append({ + "role": "user", + "content": request + }) + + # API endpoint (adjust according to DeepSeek official documentation) + api_url = "https://api.deepseek.com/v1/chat/completions" + + # Request headers + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" + } + + # Request payload + data = { + "model": "deepseek-chat", # Adjust based on available models + "messages": messages, + "stream": False, + "temperature": 0.7, + "max_tokens": 2048 + } + + try: + # Send API request + response = requests.post(api_url, headers=headers, json=data, timeout=30) + response.raise_for_status() # Raise exception for bad status codes + + # Parse response + result = response.json() + return result['choices'][0]['message']['content'] + + except requests.exceptions.RequestException as e: + return f"Request failed: {str(e)}" + except KeyError as e: + return f"Failed to parse response: {str(e)}" + except Exception as e: + return f"Error occurred: {str(e)}" \ No newline at end of file diff --git a/robonix/skill/llm/config/.gitkeep b/robonix/skill/llm/config/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/robonix/skill/llm/config/prompt.txt b/robonix/skill/llm/config/prompt.txt new file mode 100644 index 0000000..52994e7 --- /dev/null +++ b/robonix/skill/llm/config/prompt.txt @@ -0,0 +1,36 @@ +You are a spatial localization assistant. I will provide you with spatial data about a room environment and ask you to find specific items. Your task is to locate the requested item and return its world coordinates. + +The spatial data includes: +- Walls with coordinates and dimensions +- Doors with positions and dimensions +- Windows with positions and dimensions +- Bounding boxes (objects) with class names, positions, rotations, and scales +- Robot pose information + +Each object (bounding box) has: +- class_name: the type of object (e.g., 'tv', 'dining_table', 'sofa') +- position_x, position_y, position_z: the object's position coordinates +- Additional attributes like angle_z, scale_x, scale_y, scale_z +- world_center_x, world_center_y, world_center_z: the object's center coordinates in world space + +When I ask you to find an item (e.g., "find the tv" or "locate the dining table"), you should: +1. Search through the bounding boxes for the matching class_name +2. Return the world coordinates (world_center_x, world_center_y, world_center_z) of the found object +3. If multiple objects match the class_name, respond with the most likely object +4. If no object is matched, return None + +Please output strictly in the following JSON format ONLY, sorted by probability: + +```json +{{ + "chair": [world_center_x, world_center_y, world_center_z], + "dining_chair":[world_center_x, world_center_y, world_center_z], +}} +``` +or + +```json +None +``` + +Ready for your item search requests. \ No newline at end of file diff --git a/robonix/skill/llm/description.yml b/robonix/skill/llm/description.yml new file mode 100644 index 0000000..d37ae54 --- /dev/null +++ b/robonix/skill/llm/description.yml @@ -0,0 +1,6 @@ +name: "llm" +version: "1.0.0" +author: "syswonder" +start_on_boot: true +startup_command: "bash run.sh" +description: "send request to llm" diff --git a/robonix/skill/llm/run.sh b/robonix/skill/llm/run.sh new file mode 100644 index 0000000..e69de29 diff --git a/robonix/skill/move/api/api.py b/robonix/skill/move/api/api.py index d46306a..a5c3824 100644 --- a/robonix/skill/move/api/api.py +++ b/robonix/skill/move/api/api.py @@ -52,7 +52,11 @@ def move_to_ab_pos(self_entity, x, y, yaw) -> str: 移动操作的结果状态字符串 """ #TODO how read dep - return set_goal(self_entity, x,y,yaw) + DEBUG_FLAG_USE_ACTION = True + if DEBUG_FLAG_USE_ACTION: + # use the uapi runtime entity to call the binded skill and capability dynamically + self_entity.cap_set_goal(x=x, y=y, yaw=yaw) + return "success" @eaios.api @eaios.caller diff --git a/robonix/uapi/specs/skill_specs.py b/robonix/uapi/specs/skill_specs.py index c018ac9..edb7777 100644 --- a/robonix/uapi/specs/skill_specs.py +++ b/robonix/uapi/specs/skill_specs.py @@ -228,4 +228,11 @@ "output": EOS_TYPE_SpatialLM_WorldResult, "dependencies": ["cap_get_pose"], }, + "skl_ask_llm": { + "description": "Ask the LLM a question", + "type": EOS_SkillType.SKILL, + "input": {"api_key": str, "prompt": Optional[str], "request": str}, + "output": str, + "dependencies": [], + }, }