Skip to content

Temp2 #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ repos:
args: [--select, I, --fix]
# Run the linter.
- id: ruff
args: [--fix]
# Run the formatter.
- id: ruff-format
154 changes: 117 additions & 37 deletions spot_tools/examples/test_spot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import io
import os # Import the os package
import time
from copy import copy

import cv2
import numpy as np
Expand All @@ -21,7 +22,7 @@
stow_arm,
)
from spot_skills.door_utils import execute_open_door
from spot_skills.grasp_utils import object_grasp
from spot_skills.grasp_utils import object_grasp, object_grasp_YOLO, object_place
from spot_skills.navigation_utils import (
navigate_to_relative_pose,
)
Expand Down Expand Up @@ -104,18 +105,20 @@ def _run_gaze_test(spot) -> None:
relative_poses = [
# (resting_point, "Looking nowhere"),
# (relative_up, "Looking up"),
(relative_up, "Looking up"),
(math_helpers.Vec3(x=0, y=5, z=0), "Looking left"),
(math_helpers.Vec3(x=0, y=-5, z=0), "Looking right"),
# (relative_up, "Looking up"),
(math_helpers.Vec3(x=1.76, y=1, z=0.05), "Looking where Travis tells me to"),
(math_helpers.Vec3(x=1.29, y=-0.34, z=-0.54), "Looking right"),
# (math_helpers.Vec3(x=0, y=-0.5, ), "Looking -dy"),
# (math_helpers.Vec3(x=0.5, y=0, ), "Looking dx"),
# (math_helpers.Vec3(x=0.5, y=0, z=0), "Looking dx"),
# (math_helpers.Vec3(x=-0.5, y=0, ), "Looking -dx"),
# (math_helpers.Vec3(x=0, y=0, ), "Looking yaw"),
# (math_helpers.Vec3(x=0, y=0, ), "Looking -yaw"),
]
for relative_pose, msg in relative_poses:
print(msg)
spot.gaze_at_relative_pose(relative_pose)
gaze_at_relative_pose(spot, relative_pose)
input("Press enter when ready to move on")
stow_arm(spot)
input("Press enter when ready to move on")
# time.sleep(0.5)

Expand Down Expand Up @@ -177,6 +180,32 @@ def _run_grasp_test(spot) -> None:
pass


def _run_YOLO_grasp_test(spot):
open_gripper(spot)
relative_pose = math_helpers.Vec3(x=1, y=0, z=0)
gaze_at_relative_pose(spot, relative_pose)
time.sleep(0.2)

object_grasp_YOLO(
spot,
image_source="hand_color_image",
user_input=False,
semantic_class="wood block",
grasp_constraint=None,
debug=True,
)

open_gripper(spot)
stow_arm(spot)
close_gripper(spot)

pass


def _run_place_test(spot):
object_place(spot)


def _run_segment_test(spot):
open_gripper(spot)
relative_pose = math_helpers.Vec3(x=1, y=0, z=0)
Expand Down Expand Up @@ -237,6 +266,80 @@ def _run_open_door_test(spot, model_path, max_tries=2) -> None:
# # cv2.destroyAllWindows()


def _run_YOLOWorld_test(spot):
open_gripper(spot)
hand_image_request, hand_image = spot.get_image_RGB()
results = spot.yolo_model(hand_image)
object_to_pick_up = "wood block"
# Process results

annotated_hand_image = copy(hand_image)
# Show all of the detections
for r in results:
boxes = r.boxes # Bounding boxes
for box in boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
class_id = int(box.cls[0])
confidence = float(box.conf[0])

# Draw bounding box and label
cv2.rectangle(annotated_hand_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
label = f"{r.names[class_id]} {confidence:.2f}"
cv2.putText(
annotated_hand_image,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
2,
)
# Display or save the annotated image
cv2.imshow("YOLO Output", annotated_hand_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

most_confident = copy(hand_image)
best_box = None
best_confidence = -1.0

for r in results:
boxes = r.boxes
for box in boxes:
class_id = int(box.cls[0])
class_name = r.names[class_id]
confidence = float(box.conf[0])

if class_name == object_to_pick_up and confidence > best_confidence:
best_confidence = confidence
best_box = box

if best_box:
x1, y1, x2, y2 = map(int, best_box.xyxy[0].tolist())
class_id = int(best_box.cls[0])

# Draw bounding box and label
cv2.rectangle(most_confident, (x1, y1), (x2, y2), (0, 255, 0), 2)
label = f"{r.names[class_id]} {best_confidence:.2f}"
cv2.putText(
most_confident,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
2,
)

# Display or save the annotated image
cv2.imshow("Most Confident Output", most_confident)
cv2.waitKey(0)
cv2.destroyAllWindows()

else:
raise Exception("No objects of semantic class found")


def encode_numpy_image_to_base64(image_np: np.ndarray, format: str = "PNG"):
pil_image = Image.fromarray(image_np)
buffer = io.BytesIO()
Expand Down Expand Up @@ -367,54 +470,31 @@ def save_images_constant_rate(spot, object_name, folder_name, rate=1.0):

args = parser.parse_args()

spot = Spot(username=args.username, password=args.password)
spot = Spot(ip=args.ip, username=args.username, password=args.password)
print(spot.id)
# assert False
# spot.set_estop()
# spot.take_lease()
spot.robot.power_on(timeout_sec=20)
spot.robot.time_sync.wait_for_sync()
# spot.stand()
# stow_arm(spot)

yoloworld_model_path = "/home/aaron/spot_tools/data/models/yolov8x-worldv2-door.pt"

# fl_img_response, fl_img = spot.get_image_RGB(view='frontleft_fisheye_image')
# fr_img_response, fr_img = spot.get_image_RGB(view='frontright_fisheye_image')

# spot.get_live_stitched_image()
# # image = spot.get_stitched_image(fl_img_response, fr_img_response)

# image = spot.get_stitched_image(jpeg_quality_percent=100, crop_image=True)
# print(image.shape)
# cv2.imshow("Stitched Image", image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# image = spot.get_stitched_image_RGB(fl_img_response, fr_img_response, crop_image=True)
# image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# print(image.shape)
# cv2.imshow("Stitched Image", image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# yoloworld_model_path = "/home/aaron/spot_tools/data/models/yolov8x-worldv2-door.pt"

# _run_open_door_test(spot, yoloworld_model_path)
# _run_walking_test(spot)
# _run_gaze_test(spot)
# _run_YOLO_grasp_test(spot)
_run_place_test(spot)
# _run_traj_test(spot)
# _run_grasp_test(spot)
# _run_segment_test(spot)
# _run_YOLOWorld_test(spot)
# spot.pitch_up()
# print(look_for_object(spot, 'bag'))

save_images_constant_rate(
spot,
"test_object",
"/home/aaron/spot_tools/spot_tools/data/object_images",
rate=1,
)

time.sleep(1)

# spot.stand()
# spot.sit()
spot.sit()
# spot.sit()
# spot.safe_power_off()
20 changes: 20 additions & 0 deletions spot_tools/src/language/test_VLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,26 @@ def test_image_encoding(client):
print(completion.choices[0].message.content)


def test_enforced_output(client):
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]

completion = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "Extract the event information."},
{
"role": "user",
"content": "Alice and Bob are going to a science fair on Friday.",
},
],
)

print(completion.choices[0].message.content)


def main():
client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY")
Expand Down
13 changes: 11 additions & 2 deletions spot_tools/src/spot_executor/spot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
RobotCommandClient,
blocking_stand,
)
from ultralytics import YOLOWorld

from spot_executor.stitch_front_images import stitch, stitch_live, stitch_RGB

Expand All @@ -33,10 +34,18 @@ def __init__(
take_lease=True,
set_estop=False,
verbose=False,
semantic_model_path="../data/models/efficientvit_seg_l2.onnx",
semantic_model_path="/home/rrg/dcist_ws/src/awesome_dcist_t4/spot_tools/spot_tools/data/models/efficientvit_seg_l2.onnx",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we handle these paths a little more generally? I think we should either 1) Make the path optional (e.g., default to None), and skip loading the yolo model when it's not set. This might be useful if we want to be able to run Spot's non-pick skills without having to load yolo / set the weights, or 2) Make the path not optional. And then either way the hard-coded RRG path can live higher-up (in the test_spot or something?)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree the path should be optional, but I'm not sure how best to handle setting the classes for YOLOWorld. Two options include (1) loading in a file that sets the class ahead of time in the Spot init function and (2) setting the classes to be the object that is being picked up each time the grasp function gets called. If we know what objects we want to classify ahead of time, (1) makes more sense. (2) will require unnecessary computation.

yolo_world_path="/home/rrg/dcist_ws/src/awesome_dcist_t4/spot_tools/spot_tools/data/models/yolov8x-worldv2.pt",
debug=False,
semantic_name_to_id=None,
):
# YOLOv8 model for world detection
print("Initializing YOLOWorld model. ")
self.yolo_model = YOLOWorld(yolo_world_path)
custom_classes = ["", "bag", "wood block", "pipe"]
self.yolo_model.set_classes(custom_classes)
print("Set classes for YOLOWorld model.")

self.is_fake = False
self.verbose = verbose
bosdyn.client.util.setup_logging(verbose)
Expand Down Expand Up @@ -89,7 +98,7 @@ def get_pose(self):
transforms, VISION_FRAME_NAME, BODY_FRAME_NAME
)

return out_tform_body
return np.array([out_tform_body.x, out_tform_body.y, out_tform_body.angle])

def get_image(self, view="hand_color_image", show=False):
self.robot.logger.info("Getting an image from: %s", view)
Expand Down
16 changes: 9 additions & 7 deletions spot_tools/src/spot_executor/spot_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from scipy.spatial.transform import Rotation

from spot_skills.arm_utils import gaze_at_vision_pose
from spot_skills.grasp_utils import object_grasp, object_place
from spot_skills.grasp_utils import object_grasp_YOLO, object_place
from spot_skills.navigation_utils import (
follow_trajectory_continuous,
turn_to_point,
Expand Down Expand Up @@ -92,12 +92,14 @@ def execute_gaze(self, command, feedback, pick_next=False):

def execute_pick(self, command, feedback):
feedback.print("INFO", "Executing `pick` command")
success = object_grasp(
self.spot_interface,
semantic_class=command.object_class,
labelspace_map=self.labelspace_map,
debug=self.debug,
)
# success = object_grasp(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have at least some faith that object_grasp_YOLO works / will work soon, we should delete this commented out part.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think command should have a type 'command.model_type' that calls the correct grasp function. Perhaps the ideal situation is for a single grasp function to exist.

# self.spot_interface,
# semantic_class=command.object_class,
# labelspace_map=self.labelspace_map,
# debug=self.debug,
# )

success = object_grasp_YOLO(self.spot_interface, command.object_class)

if self.debug:
success, debug_images = success
Expand Down
Loading