| rte_mldev.h(3) | Library Functions Manual | rte_mldev.h(3) |
rte_mldev.h
#include <rte_common.h>
#include <rte_log.h>
#include <rte_mempool.h>
struct rte_ml_dev_info
struct rte_ml_dev_config
struct rte_ml_dev_qp_conf
struct rte_ml_buff_seg
struct rte_ml_op
struct rte_ml_op_error
struct rte_ml_dev_stats
struct rte_ml_dev_xstats_map
struct rte_ml_model_params
struct rte_ml_io_info
struct rte_ml_model_info
#define RTE_ML_STR_MAX 128
typedef void(* rte_ml_dev_stop_flush_t) (int16_t
dev_id, uint16_t qp_id, struct rte_ml_op *op)
enum rte_ml_op_status { RTE_ML_OP_STATUS_SUCCESS =
0, RTE_ML_OP_STATUS_NOT_PROCESSED, RTE_ML_OP_STATUS_ERROR }
enum rte_ml_dev_xstats_mode { RTE_ML_DEV_XSTATS_DEVICE,
RTE_ML_DEV_XSTATS_MODEL }
enum rte_ml_io_type { RTE_ML_IO_TYPE_UNKNOWN = 0,
RTE_ML_IO_TYPE_INT8, RTE_ML_IO_TYPE_UINT8,
RTE_ML_IO_TYPE_INT16, RTE_ML_IO_TYPE_UINT16,
RTE_ML_IO_TYPE_INT32, RTE_ML_IO_TYPE_UINT32,
RTE_ML_IO_TYPE_INT64, RTE_ML_IO_TYPE_UINT64,
RTE_ML_IO_TYPE_FP8, RTE_ML_IO_TYPE_FP16,
RTE_ML_IO_TYPE_FP32, RTE_ML_IO_TYPE_BFLOAT16 }
enum rte_ml_io_layout { RTE_ML_IO_LAYOUT_PACKED,
RTE_ML_IO_LAYOUT_SPLIT }
__rte_experimental int rte_ml_dev_init (size_t dev_max)
__rte_experimental uint16_t rte_ml_dev_count (void)
__rte_experimental int rte_ml_dev_is_valid_dev (int16_t dev_id)
__rte_experimental int rte_ml_dev_socket_id (int16_t dev_id)
__rte_experimental int rte_ml_dev_info_get (int16_t dev_id, struct
rte_ml_dev_info *dev_info)
__rte_experimental int rte_ml_dev_configure (int16_t dev_id, const
struct rte_ml_dev_config *config)
__rte_experimental uint16_t rte_ml_dev_queue_pair_count (int16_t
dev_id)
__rte_experimental int rte_ml_dev_queue_pair_setup (int16_t dev_id,
uint16_t queue_pair_id, const struct rte_ml_dev_qp_conf *qp_conf, int
socket_id)
__rte_experimental int rte_ml_dev_start (int16_t dev_id)
__rte_experimental int rte_ml_dev_stop (int16_t dev_id)
__rte_experimental int rte_ml_dev_close (int16_t dev_id)
__rte_experimental uint16_t rte_ml_enqueue_burst (int16_t dev_id,
uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops)
__rte_experimental uint16_t rte_ml_dequeue_burst (int16_t dev_id,
uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops)
__rte_experimental int rte_ml_op_error_get (int16_t dev_id, struct
rte_ml_op *op, struct rte_ml_op_error *error)
__rte_experimental int rte_ml_dev_stats_get (int16_t dev_id, struct
rte_ml_dev_stats *stats)
__rte_experimental void rte_ml_dev_stats_reset (int16_t dev_id)
__rte_experimental int rte_ml_dev_xstats_names_get (int16_t dev_id,
enum rte_ml_dev_xstats_mode mode, int32_t model_id, struct
rte_ml_dev_xstats_map *xstats_map, uint32_t size)
__rte_experimental int rte_ml_dev_xstats_by_name_get (int16_t dev_id,
const char *name, uint16_t *stat_id, uint64_t *value)
__rte_experimental int rte_ml_dev_xstats_get (int16_t dev_id, enum
rte_ml_dev_xstats_mode mode, int32_t model_id, const uint16_t
stat_ids[], uint64_t values[], uint16_t nb_ids)
__rte_experimental int rte_ml_dev_xstats_reset (int16_t dev_id, enum
rte_ml_dev_xstats_mode mode, int32_t model_id, const uint16_t
stat_ids[], uint16_t nb_ids)
__rte_experimental int rte_ml_dev_dump (int16_t dev_id, FILE *fd)
__rte_experimental int rte_ml_dev_selftest (int16_t dev_id)
__rte_experimental int rte_ml_model_load (int16_t dev_id, struct
rte_ml_model_params *params, uint16_t *model_id)
__rte_experimental int rte_ml_model_unload (int16_t dev_id, uint16_t
model_id)
__rte_experimental int rte_ml_model_start (int16_t dev_id, uint16_t
model_id)
__rte_experimental int rte_ml_model_stop (int16_t dev_id, uint16_t
model_id)
__rte_experimental int rte_ml_model_info_get (int16_t dev_id, uint16_t
model_id, struct rte_ml_model_info *model_info)
__rte_experimental int rte_ml_model_params_update (int16_t dev_id,
uint16_t model_id, void *buffer)
__rte_experimental int rte_ml_io_float32_to_int8 (const void *fp32,
void *i8, uint64_t nb_elements, float scale, int8_t zero_point)
__rte_experimental int rte_ml_io_int8_to_float32 (const void *i8, void
*fp32, uint64_t nb_elements, float scale, int8_t zero_point)
__rte_experimental int rte_ml_io_float32_to_uint8 (const void *fp32,
void *ui8, uint64_t nb_elements, float scale, uint8_t zero_point)
__rte_experimental int rte_ml_io_uint8_to_float32 (const void *ui8,
void *fp32, uint64_t nb_elements, float scale, uint8_t zero_point)
__rte_experimental int rte_ml_io_float32_to_int16 (const void *fp32,
void *i16, uint64_t nb_elements, float scale, int16_t zero_point)
__rte_experimental int rte_ml_io_int16_to_float32 (const void *i16,
void *fp32, uint64_t nb_elements, float scale, int16_t zero_point)
__rte_experimental int rte_ml_io_float32_to_uint16 (const void *fp32,
void *ui16, uint64_t nb_elements, float scale, uint16_t zero_point)
__rte_experimental int rte_ml_io_uint16_to_float32 (const void *ui16,
void *fp32, uint64_t nb_elements, float scale, uint16_t zero_point)
__rte_experimental int rte_ml_io_float32_to_int32 (const void *fp32,
void *i32, uint64_t nb_elements, float scale, int32_t zero_point)
__rte_experimental int rte_ml_io_int32_to_float32 (const void *i32,
void *fp32, uint64_t nb_elements, float scale, int32_t zero_point)
__rte_experimental int rte_ml_io_float32_to_uint32 (const void *fp32,
void *ui32, uint64_t nb_elements, float scale, uint32_t zero_point)
__rte_experimental int rte_ml_io_uint32_to_float32 (const void *ui32,
void *fp32, uint64_t nb_elements, float scale, uint32_t zero_point)
__rte_experimental int rte_ml_io_float32_to_int64 (const void *fp32,
void *i64, uint64_t nb_elements, float scale, int64_t zero_point)
__rte_experimental int rte_ml_io_int64_to_float32 (const void *i64,
void *fp32, uint64_t nb_elements, float scale, int64_t zero_point)
__rte_experimental int rte_ml_io_float32_to_uint64 (const void *fp32,
void *ui64, uint64_t nb_elements, float scale, uint64_t zero_point)
__rte_experimental int rte_ml_io_uint64_to_float32 (const void *ui64,
void *fp32, uint64_t nb_elements, float scale, uint64_t zero_point)
__rte_experimental int rte_ml_io_float32_to_float16 (const void *fp32,
void *fp16, uint64_t nb_elements)
__rte_experimental int rte_ml_io_float16_to_float32 (const void *fp16,
void *fp32, uint64_t nb_elements)
__rte_experimental int rte_ml_io_float32_to_bfloat16 (const void *fp32,
void *bf16, uint64_t nb_elements)
__rte_experimental int rte_ml_io_bfloat16_to_float32 (const void *bf16,
void *fp32, uint64_t nb_elements)
__rte_experimental int rte_ml_io_quantize (int16_t dev_id, uint16_t
model_id, struct rte_ml_buff_seg **dbuffer, struct
rte_ml_buff_seg **qbuffer)
__rte_experimental int rte_ml_io_dequantize (int16_t dev_id, uint16_t
model_id, struct rte_ml_buff_seg **qbuffer, struct
rte_ml_buff_seg **dbuffer)
__rte_experimental struct rte_mempool * rte_ml_op_pool_create
(const char *name, unsigned int nb_elts, unsigned int cache_size, uint16_t
user_size, int socket_id)
__rte_experimental void rte_ml_op_pool_free (struct rte_mempool
*mempool)
Warning
ML (Machine Learning) device API.
The ML framework is built on the following model:
+-----------------+ rte_ml_[en|de]queue_burst() | | | | Machine o------+ +--------+ | | Learning | | | queue | | +------+ | Inference o------+-----o |<===o===>|Core 0| | Engine | | | pair 0 | +------+ | o----+ | +--------+ | | | | +-----------------+ | | +--------+
^ | | | queue | +------+
| | +-----o |<=======>|Core 1|
| | | pair 1 | +------+
| | +--------+ +--------+--------+ | | +-------------+ | | +--------+ | | Model 0 | | | | queue | +------+ | +-------------+ | +-------o |<=======>|Core N| | +-------------+ | | pair N | +------+ | | Model 1 | | +--------+ | +-------------+ | | +-------------+ |<------> rte_ml_model_load() | | Model .. | |-------> rte_ml_model_info_get() | +-------------+ |<------- rte_ml_model_start() | +-------------+ |<------- rte_ml_model_stop() | | Model N | |<------- rte_ml_model_params_update() | +-------------+ |<------- rte_ml_model_unload() +-----------------+
ML Device: A hardware or software-based implementation of ML device API for
running inferences using a pre-trained ML model.
ML Model: An ML model is an algorithm trained over a dataset. A model consists of procedure/algorithm and data/pattern required to make predictions on live data. Once the model is created and trained outside of the DPDK scope, the model can be loaded via rte_ml_model_load() and then start it using rte_ml_model_start() API. The rte_ml_model_params_update() can be used to update the model parameters such as weight and bias without unloading the model using rte_ml_model_unload().
ML Inference: ML inference is the process of feeding data to the model via rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated outputs/predictions from the started model.
In all functions of the ML device API, the ML device is designated by an integer >= 0 named as device identifier dev_id.
The functions exported by the ML device API to setup a device designated by its device identifier must be invoked in the following order:
- rte_ml_dev_configure()
- rte_ml_dev_queue_pair_setup()
- rte_ml_dev_start()
A model is required to run the inference operations with the user specified
inputs. Application needs to invoke the ML model API in the following order
before queueing inference jobs.
- rte_ml_model_load()
- rte_ml_model_start()
A model can be loaded on a device only after the device has been configured
and can be started or stopped only after a device has been started.
The rte_ml_model_info_get() API is provided to retrieve the information related to the model. The information would include the shape and type of input and output required for the inference.
Data quantization and dequantization is one of the main aspects in ML domain. This involves conversion of input data from a higher precision to a lower precision data type and vice-versa for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input and output buffers holding data for multiple batches.
Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the size of quantized and de-quantized multi-batch input and output buffers.
User can optionally update the model parameters with rte_ml_model_params_update() after invoking rte_ml_model_stop() API on a given model ID.
The application can invoke, in any order, the functions exported by the ML API to enqueue inference jobs and dequeue inference response.
If the application wants to change the device configuration (i.e., call rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API for the given model. The application does not need to call rte_ml_dev_stop() API for any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in start state after invoking rte_ml_model_start() API, then the application can call rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID.
Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
Typical application utilisation of the ML API will follow the following programming flow.
Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD are lock-free functions which assume to not be invoked in parallel on different logical cores on the same target object. For instance, the dequeue function of a poll mode driver cannot be invoked in parallel on two logical cores to operate on same queue pair. Of course, this function can be invoked in parallel by different logical core on different queue pair. It is the responsibility of the user application to enforce this rule.
Definition in file rte_mldev.h.
Maximum length of name string
Definition at line 153 of file rte_mldev.h.
Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op
Definition at line 302 of file rte_mldev.h.
Status of ML operation
Enumerator
Definition at line 404 of file rte_mldev.h.
Selects the component of the mldev to retrieve statistics from.
Enumerator
Definition at line 632 of file rte_mldev.h.
Input and output data types. ML models can operate on reduced precision datatypes to achieve better power efficiency, lower network latency and lower memory footprint. This enum is used to represent the lower precision integer and floating point types used by ML models.
Enumerator
Definition at line 875 of file rte_mldev.h.
ML I/O buffer layout
Enumerator
When I/O segmentation is supported by the device, the packed data can be split into multiple segments. In this case, each segment is expected to be aligned to rte_ml_dev_info::align_size
Same applies to output.
See also
When I/O segmentation is supported, each input can be split into multiple segments. In this case, each segment is expected to be aligned to rte_ml_dev_info::align_size
Same applies to output.
See also
Definition at line 905 of file rte_mldev.h.
Maximum number of devices if rte_ml_dev_init() is not called. Initialize the device array before probing devices. If not called, the first device probed would initialize the array to a size of RTE_MLDEV_DEFAULT_MAX.
Parameters
Returns
Get the total number of ML devices that have been successfully initialised.
Returns
Check if the device is in ready state.
Parameters
Returns
Return the NUMA socket to which a device is connected.
Parameters
Returns
Retrieve the information of the device.
Parameters
Returns
Configure an ML device.
This function must be invoked first before any other function in the API.
ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after rte_ml_dev_close() is called.
The caller may use rte_ml_dev_info_get() to get the capability of each resources available for this ML device.
Parameters
Returns
Get the number of queue pairs on a specific ML device.
Parameters
Returns
Set up a queue pair for a device. This should only be called when the device is stopped.
Parameters
Returns
Start an ML device.
The device start step consists of setting the configured features and enabling the ML device to accept inference jobs.
Parameters
Returns
Stop an ML device. A stopped device cannot accept inference jobs. The device can be restarted with a call to rte_ml_dev_start().
Parameters
Returns
Close an ML device. The device cannot be restarted!
Parameters
Returns
Enqueue a burst of ML inferences for processing on an ML device.
The rte_ml_enqueue_burst() function is invoked to place ML inference operations on the queue qp_id of the device designated by its dev_id.
The nb_ops parameter is the number of inferences to process which are supplied in the ops array of rte_ml_op structures.
The rte_ml_enqueue_burst() function returns the number of inferences it actually enqueued for processing. A return value equal to nb_ops means that all packets have been enqueued.
Parameters
Returns
Dequeue a burst of processed ML inferences operations from a queue on the ML device. The dequeued operations are stored in rte_ml_op structures whose pointers are supplied in the ops array.
The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, which is the number of rte_ml_op data structures effectively supplied into the ops array.
A return value equal to nb_ops indicates that the queue contained at least nb_ops* operations, and this is likely to signify that other processed operations remain in the devices output queue. Application implementing a 'retrieve as many processed operations as possible' policy can check this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than nb_ops is returned.
The rte_ml_dequeue_burst() function does not provide any error notification to avoid the corresponding overhead.
Parameters
Returns
Get PMD specific error information for an ML op.
When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, This API allows to get PMD specific error details.
Parameters
Returns
Retrieve the general I/O statistics of a device.
Parameters
Returns
Reset the statistics of a device.
Parameters
Retrieve names of extended statistics of an ML device.
Parameters
Returns
Retrieve the value of a single stat by requesting it by name.
Parameters
Returns
Retrieve extended statistics of an ML device.
Parameters
Returns
Reset the values of the xstats of the selected component in the device.
Parameters
Returns
Dump internal information about dev_id to the FILE* provided in fd.
Parameters
Returns
Trigger the ML device self test.
Parameters
Returns
Load an ML model to the device.
Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
Parameters
Returns
Unload an ML model from the device.
Parameters
Returns
Start an ML model for the given device ID.
Start an ML model to accept inference requests.
Parameters
Returns
Stop an ML model for the given device ID.
Model stop would disable the ML model to be used for inference jobs. All inference jobs must have been completed before model stop is attempted.
Parameters
Returns
Get ML model information.
Parameters
Returns
Update the model parameters without unloading model.
Update model parameters such as weights and bias without unloading the model. rte_ml_model_stop() must be called before invoking this API.
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to signed 8-bit integer format (INT8).
Parameters
Returns
Convert a buffer containing numbers in signed 8-bit integer format (INT8) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to unsigned 8-bit integer format (UINT8).
Parameters
Returns
Convert a buffer containing numbers in unsigned 8-bit integer format (UINT8) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to signed 16-bit integer format (INT16).
Parameters
Returns
Convert a buffer containing numbers in signed 16-bit integer format (INT16) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to unsigned 16-bit integer format (UINT16).
Parameters
Returns
Convert a buffer containing numbers in unsigned 16-bit integer format (UINT16) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to signed 32-bit integer format (INT32).
Parameters
Returns
Convert a buffer containing numbers in signed 32-bit integer format (INT32) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to unsigned 32-bit integer format (UINT32).
Parameters
Returns
Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to signed 64-bit integer format (INT64).
Parameters
Returns
Convert a buffer containing numbers in signed 64-bit integer format (INT64) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to unsigned 64-bit integer format (UINT64).
Parameters
Returns
Convert a buffer containing numbers in unsigned 64-bit integer format (UINT64) to single precision floating format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to half precision floating point format (FP16).
Parameters
Returns
Convert a buffer containing numbers in half precision floating format (FP16) to single precision floating point format (float32).
Parameters
Returns
Convert a buffer containing numbers in single precision floating format (float32) to brain floating point format (bfloat16).
Parameters
Returns
Convert a buffer containing numbers in brain floating point format (bfloat16) to single precision floating point format (float32).
Parameters
Returns
Quantize input data.
Quantization converts data from a higher precision types to a lower precision types to improve the throughput and efficiency of the model execution with minimal loss of accuracy. Types of dequantized data and quantized data are specified by the model.
Parameters
Returns
Dequantize output data.
Dequantization converts data from a lower precision type to a higher precision type. Types of quantized data and dequantized are specified by the model.
Parameters
Returns
Create an ML operation pool
Parameters
Returns
Free an ML operation pool
Parameters
Generated automatically by Doxygen for DPDK from the source code.
| Version 24.11.3 | DPDK |