资料链接
一键三连~~~~ https://www.bilibili.com/video/BV1MS4y1N7fw?spm_id_from=333.999.0.0 百度云资料: 链接:https://pan.baidu.com/s/1nKMUcyIoE8VljEdpw9-XdQ 提取码:yeap 复制这段内容后打开百度网盘手机App,操作更方便哦
训练后的模型准备
本文的关键是tensorrt文件的部署,再加上车道线各不相同,模型的训练部分会省略。 大家可以根据网络的教程进行自行训练,比如:Bubbliiiing的u-net教程:https://www.bilibili.com/video/BV1rz4y117rR?spm_id_from=333.337.search-card.all.click
在torch中将.pt/.pth文件转换成onnx文件
示例代码:
import numpy as np
import torch, os
from Unet import UNet
Device = torch.device("cuda:0")
unet = UNet(in_channels=3, out_channels=1, init_features=4, WithActivateLast=True, ActivateFunLast=torch.sigmoid).to(
Device)
unet.load_state_dict(torch.load('Output/270nice.pt', map_location=Device))
unet.eval()
dummy_input = torch.randn(1, 3, 128, 128, device=Device)
input_names = ['data']
output_names = ['fc']
torch.onnx.export(unet, dummy_input, 'unet_270nice.onnx', export_params=True, verbose=True, input_names=input_names,
output_names=output_names,opset_version=12)
安装pycuda,torch(可选)
百度云中包含安装包 首先修改./bashrc文件:
export PATH=/usr/local/cuda-10.2/bin:/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
export CUDA_HOME=$CUDA_HOME:/usr/local/cuda
export PATH="$PATH:/usr/src/tensorrt/bin"
torch安装(可选):
参考链接:https://blog.csdn.net/weixin_40011280/article/details/120643540(按理说。。。好像也可以不安装torch)
sudo apt-get install python3-pip libopenblas-base libopenmpi-dev
sudo pip3 install Cython
sudo pip3 install numpy==1.16.6
sudo pip3 install torch-1.7.0-cp36-cp36m-linux_aarch64.whl
sudo apt-get install libjpeg-dev zlib1g-dev libpython3-dev libavcodec-dev libavformat-dev libswscale-dev
sudo python3 setup.py install
安装pycuda
参考链接:https://blog.csdn.net/weixin_44501699/article/details/106470671 ./bashrc修改部分已经在上文完成
tar zxvf pycuda-2019.1.2.tar.gz
cd pycuda-2019.1.2/
python3 configure.py --cuda-root=/usr/local/cuda-10.2
sudo python3 setup.py install
转换模型文件
参考链接: https://blog.csdn.net/m0_51004308/article/details/116152611 ./bashrc修改部分已经在上文完成
trtexec --explicitBatch=1 --onnx=源文件名称.onnx --saveEngine=输出文件名称.trt --int8 --workspace=1 --verbose
运行tensorrt加速推理文件
import pycuda.driver as cuda
import pycuda.autoinit
import cv2
import time
from PIL import Image
import numpy as np
import os
import tensorrt as trt
import multiprocessing as mp
import rospy
from ackermann_msgs.msg import AckermannDrive
TRT_LOGGER = trt.Logger()
engine_file_path = "unet_270nice.trt"
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(
binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def do_inference_v2(context, bindings, inputs, outputs, stream):
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
stream.synchronize()
return [out.host for out in outputs]
def gstreamer_pipeline(
capture_width=640,
capture_height=480,
display_width=640,
display_height=480,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def ImgRead(ImgQueue):
cam = cv2.VideoCapture("/dev/video0", cv2.CAP_V4L2)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
cam.set(cv2.CAP_PROP_FPS, 30)
print(cam.get(cv2.CAP_PROP_FPS))
print("ImgRead process: pre cam done")
if not cam.isOpened():
print("ImgRead process: Unable to open camera")
else:
print('ImgRead process: Open camera success!')
while True:
ret, Img = cam.read()
if not ret:
break
while not ImgQueue.empty():
try:
ImgQueue.get_nowait()
except:
continue
ImgQueue.put(Img)
cv2.imshow('ImgRead', Img)
cv2.waitKey(1)
def main():
print("start!")
ImgQueue = mp.Queue()
Mps = []
Mps.append(mp.Process(target=ImgRead, args=(ImgQueue,)))
[Mp.start() for Mp in Mps]
rospy.init_node('follow_line', anonymous=True)
ackermann = AckermannDrive()
ackermann_cmd_pub = rospy.Publisher('/tianracer/ackermann_cmd', AckermannDrive, queue_size=5)
print("ProcessImg process: ImgQueue load img")
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime, runtime.deserialize_cuda_engine(
f.read()) as engine, engine.create_execution_context() as context:
inputs, outputs, bindings, stream = allocate_buffers(engine)
print("ProcessImg process: load done")
while 1:
if not ImgQueue.empty():
image_raw = ImgQueue.get()
else:
continue
Img = cv2.resize(image_raw, (128, 128))
Img = cv2.cvtColor(Img, cv2.COLOR_BGR2RGB)
Img = Img.transpose((2, 0, 1)).astype(np.float32)
Img = Img / 255.0
Img = (Img - 0.46) / 0.10
inputs[0].host = np.ascontiguousarray(Img)
start = time.time()
trt_outputs = do_inference_v2(
context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
trt_outputs = trt_outputs[0].reshape(128, 128)
OutputImg = (trt_outputs * 255).astype(np.uint8)
cropped1 = OutputImg[60:80, :]
cropped2 = OutputImg[80:100, :]
cropped3 = OutputImg[100:120, :]
h, w = cropped1.shape
M1 = cv2.moments(cropped1, False)
M2 = cv2.moments(cropped2, False)
M3 = cv2.moments(cropped3, False)
try:
cx1, cy1 = int(M1['m10'] / M1['m00']), int(M1['m01'] / M1['m00'])
angle1 = -(int(cx1 - w / 2) / int(h - cy1))
cx2, cy2 = int(M2['m10'] / M2['m00']), int(M2['m01'] / M2['m00'])
angle2 = -(int(cx2 - w / 2) / int(h - cy2))
cx3, cy3 = int(M3['m10'] / M3['m00']), int(M3['m01'] / M3['m00'])
angle3 = -(int(cx3 - w / 2) / int(h - cy3))
angle = (0.5*angle1 + 0.3*angle2 + 0.2*angle3)/12
except:
angle = 1
ackermann.speed = 0.95
ackermann.steering_angle = angle
print('ackermann.steering_angle:', ackermann.steering_angle)
if angle > 0.4 or angle < -0.4:
ackermann.speed = 0.80
ackermann_cmd_pub.publish(ackermann)
finish = time.time()
print('process time {} sec'.format(finish - start))
OutputImg[cy1 +60 - 3:cy1 + 60 + 3, cx1 - 3:cx1 + 3] = 125
OutputImg[cy2 + 80 - 3:cy2 + 80+3, cx2 - 3:cx2 + 3]= 125
OutputImg[cy3+100 - 3:cy3+100+ 3, cx3 - 3:cx3 + 3]= 125
cv2.imshow("out", OutputImg)
finish = time.time()
print('process time {} sec'.format(finish - start))
Key = cv2.waitKey(1)
if Key == 'Q' or Key == 'q':
break
[Mp.terminate() for Mp in Mps]
if __name__ == '__main__':
main()
运行喽
首先在tianracer上运行:
roslaunch tianracer_core tianracer_core.launch
直接python3 xxx.py就能跑起来啦
|