C++
The following example decoder uses ffmpeg to decode iframes from MultiSense compressed streams. For convenience, OpenCV is used for the display of images
#include <iostream>
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
}
#include <opencv2/highgui.hpp>
class Decoder
{
public:
Decoder(const AVCodecID &codec, const AVPixelFormat &pixel_format):
m_pixel_format(pixel_format),
m_bytes_per_pixel(pixel_format == AV_PIX_FMT_GRAY8 ? 1 : 3)
{
m_codec = avcodec_find_decoder(codec);
if (!codec)
{
throw std::runtime_error("Decoder: unable to find codec");
}
m_context = avcodec_alloc_context3(m_codec);
if(!m_context)
{
throw std::runtime_error("Decoder: unable to allocate context");
}
if (avcodec_open2(m_context, m_codec, nullptr) < 0)
{
throw std::runtime_error("Decoder: unable to open codec");
}
m_frame = av_frame_alloc();
if(!m_frame)
{
throw std::runtime_error("Decoder: unable to allocate frame");
}
av_init_packet(&m_packet);
}
~Decoder()
{
avcodec_free_context(&m_context);
av_frame_free(&m_frame);
av_packet_unref(&m_packet);
if (m_sws_context)
{
sws_freeContext(m_sws_context);
}
if (m_packed_image_data)
{
delete[] m_packed_image_data;
}
}
cv::Mat decode(uint8_t *data, size_t size)
{
m_packet.data = data;
m_packet.size = size;
if (avcodec_send_packet(m_context, &m_packet) < 0)
{
std::cerr << "Decoder: error sending packet" << std::endl;
av_init_packet(&m_packet);
return {};
}
if (avcodec_receive_frame(m_context, m_frame) < 0)
{
std::cerr << "Decoder: invalid frame" << std::endl;
av_init_packet(&m_packet);
return {};
}
if (!m_sws_context)
{
m_sws_context = sws_getContext(m_frame->width,
m_frame->height,
static_cast<AVPixelFormat>(m_frame->format),
m_frame->width,
m_frame->height,
m_pixel_format,
0,
nullptr,
nullptr,
nullptr);
if (!m_sws_context)
{
std::cerr << "Decoder: unable to initialize sws context" << std::endl;
return {};
}
m_packed_image_data = new uint8_t[m_frame->width * m_frame->height * m_bytes_per_pixel];
}
//
// Convert encoded frame to desired output format
int output_stride = m_frame->width * m_bytes_per_pixel;
sws_scale(m_sws_context,
m_frame->data,
m_frame->linesize,
0,
m_frame->height,
&m_packed_image_data,
&output_stride);
av_init_packet(&m_packet);
const cv::Mat image(m_frame->height, m_frame->width, m_bytes_per_pixel == 3 ? CV_8UC3 : CV_8UC1, m_packed_image_data);
return image.clone();
};
private:
AVCodec *m_codec = nullptr;
AVCodecContext *m_context= nullptr;
AVFrame *m_frame = nullptr;
AVPacket m_packet;
SwsContext *m_sws_context = nullptr;
AVPixelFormat m_pixel_format = AV_PIX_FMT_BGR24;
uint8_t *m_packed_image_data = nullptr;
size_t m_bytes_per_pixel = 3;
};
int main()
{
//
// Note a unique decoder needs to be constructed for each MultiSense image stream which is being decoded
auto color_decoder = std::make_unique<Decoder>(AV_CODEC_ID_H264, AV_PIX_FMT_BGR24);
auto grayscale_decoder std::make_unique<Decoder>(AV_CODEC_ID_H264, AV_PIX_FMT_GRAY8);
//
// Initialize and capture data from the MultiSense, or read data in from file
...
//
// If data is is being received in realtime from a MultiSense, the following code should be moved
// to a image callback
const auto color_image = color_decoder->decode(color_image_data_ptr, color_data_length);
const auto grayscale_image = grayscale_decoder->decode(grayscale_image_data_ptr, grayscale_data_length);
cv::imshow("color", color_image);
cv::waitKey(1);
cv::imshow("grayscale", grayscale_image);
cv::waitKey(1);
return 0;
}