ecvl/master/support__eddl_8h_source.html

 /*
 * ECVL - European Computer Vision Library
 * Version: 1.0.3
 * copyright (c) 2021, Università degli Studi di Modena e Reggio Emilia (UNIMORE), AImageLab
 * Authors:
 *    Costantino Grana (costantino.grana@unimore.it)
 *    Federico Bolelli (federico.bolelli@unimore.it)
 *    Michele Cancilla (michele.cancilla@unimore.it)
 *    Laura Canalini (laura.canalini@unimore.it)
 *    Stefano Allegretti (stefano.allegretti@unimore.it)
 * All rights reserved.
 */

 #ifndef ECVL_SUPPORT_EDDL_H_
 #define ECVL_SUPPORT_EDDL_H_

 #include "ecvl/augmentations.h"
 #include "ecvl/core/filesystem.h"
 #include "ecvl/core/image.h"
 #include "ecvl/dataset_parser.h"

 #include <eddl/apis/eddl.h>

 #include <algorithm>
 #include <condition_variable>
 #include <mutex>
 #include <queue>
 #include <thread>
 #include <tuple>

 namespace ecvl
 {
 #define ECVL_ERROR_AUG_DOES_NOT_EXIST throw std::runtime_error(ECVL_ERROR_MSG "Augmentation for this split does not exist");
 #define ECVL_ERROR_WORKERS_LESS_THAN_ONE throw std::runtime_error(ECVL_ERROR_MSG "Dataset workers must be at least one");

 void ImageToTensor(const Image& img, Tensor*& t);

 void ImageToTensor(const Image& img, Tensor*& t, const int& offset);

 void TensorToImage(const Tensor* t, Image& img);

 void TensorToView(const Tensor* t, View<DataType::float32>& v);

 class DatasetAugmentations
 {
     std::vector<std::shared_ptr<Augmentation>> augs_;
 public:
     DatasetAugmentations(const std::vector<std::shared_ptr<Augmentation>>& augs) : augs_(augs) {}

     // This makes a deep copy of the Augmentations
     DatasetAugmentations(const DatasetAugmentations& other)
     {
         for (const auto& a : other.augs_) {
             augs_.emplace_back(a ? a->Clone() : nullptr);
         }
     }

     // Getters: YAGNI

     bool Apply(const int split, Image& img, const Image& gt = Image())
     {
         // check if the augs for split st are provided
         try {
             if (augs_.at(split)) {
                 augs_[split]->Apply(img, gt);
                 return true;
             }
             return false;
         }
         catch (const std::out_of_range) {
             ECVL_ERROR_AUG_DOES_NOT_EXIST
         }
     }

     bool Apply(SplitType st, Image& img, const Image& gt = Image())
     {
         return Apply(+st, img, gt); // Magic + operator
     }

     bool IsEmpty() const
     {
         return augs_.empty();
     }
 };

 class ProducersConsumerQueue
 {
     std::condition_variable cond_notempty_;
     std::condition_variable cond_notfull_;
     mutable std::mutex mutex_;
     std::queue<std::tuple<Sample, Tensor*, Tensor*>> cpq_;
     unsigned max_size_;
     unsigned threshold_;
 public:
     ProducersConsumerQueue() {}
     ProducersConsumerQueue(unsigned mxsz) : max_size_(mxsz), threshold_(max_size_ / 2) {}
     ProducersConsumerQueue(unsigned mxsz, unsigned thresh) : max_size_(mxsz), threshold_(thresh) {}

     void Push(const Sample& sample, Tensor* const image, Tensor* const label)
     {
         std::unique_lock<std::mutex> lock(mutex_);
         cond_notfull_.wait(lock, [this]() { return cpq_.size() < max_size_; });
         cpq_.push(std::make_tuple(sample, image, label));
         cond_notempty_.notify_one();
     }

     void FreeLockedOnPush()
     {
         cond_notfull_.notify_all();
     }

     void Pop(Sample& sample, Tensor*& image, Tensor*& label)
     {
         std::unique_lock<std::mutex> lock(mutex_);
         cond_notempty_.wait(lock, [this]() { return !cpq_.empty(); });
         std::tie(sample, image, label) = cpq_.front();
         cpq_.pop();
         if (cpq_.size() < threshold_) {
             cond_notfull_.notify_all();
         }
     }

     bool IsFull() const
     {
         std::unique_lock<std::mutex> lock(mutex_);
         return cpq_.size() >= max_size_;
     }

     bool IsEmpty() const
     {
         std::unique_lock<std::mutex> lock(mutex_);
         return cpq_.empty();
     }

     size_t Length() const
     {
         std::unique_lock<std::mutex> lock(mutex_);
         return cpq_.size();
     }

     void SetSize(int max_size, int thresh = -1)
     {
         max_size_ = max_size;
         threshold_ = thresh != -1 ? thresh : max_size / 2;
     }

     void Clear()
     {
         std::unique_lock<std::mutex> lock(mutex_);

         // Remove residual samples and delete data
         while (!cpq_.empty()) {
             cpq_.pop();
         }
     }
 };

 class ThreadCounters
 {
 public:
     int counter_;
     int min_, max_;
     ThreadCounters(int min, int max) : counter_{ min }, min_{ min }, max_{ max } {}
     ThreadCounters(int counter, int min, int max) : counter_{ counter }, min_{ min }, max_{ max } {}
     void Reset() { counter_ = min_; }
 };

 class DLDataset : public Dataset
 {
     const unsigned processor_count_ = std::thread::hardware_concurrency();
 protected:
     unsigned num_workers_;
     std::vector<std::vector<ThreadCounters>> splits_tc_;
     std::vector<std::thread> producers_;
     bool active_ = false;
     std::mutex active_mutex_;
     static std::default_random_engine re_;
     void InitTC(int split_index);

     void SetTensorsShape()
     {
         switch (task_) {
         case Task::classification:
             tensors_shape_ = std::make_pair<std::vector<int>, std::vector<int>>(
                 { batch_size_, n_channels_, resize_dims_[0], resize_dims_[1] },
                 { batch_size_, vsize(classes_) });
             break;
         case Task::segmentation:
             tensors_shape_ = std::make_pair<std::vector<int>, std::vector<int>>(
                 { batch_size_, n_channels_, resize_dims_[0], resize_dims_[1] },
                 { batch_size_, n_channels_gt_, resize_dims_[0], resize_dims_[1] });
             break;
         }
     }

 public:
     int n_channels_;
     int n_channels_gt_ = -1;
     std::vector<int> resize_dims_;
     int batch_size_;
     std::vector<int> current_batch_;
     ColorType ctype_;
     ColorType ctype_gt_;
     DatasetAugmentations augs_;
     ProducersConsumerQueue queue_;
     std::pair< std::vector<int>, std::vector<int>> tensors_shape_;
     DLDataset(const filesystem::path& filename,
         const int batch_size,
         const DatasetAugmentations& augs,
         const ColorType ctype = ColorType::RGB,
         const ColorType ctype_gt = ColorType::GRAY,
         const unsigned num_workers = 1,
         const double queue_ratio_size = 1.,
         const std::unordered_map<std::string, bool>& drop_last = std::unordered_map<std::string, bool>{},
         bool verify = false) :

         Dataset{ filename, verify },
         batch_size_{ batch_size },
         augs_(augs),
         num_workers_{ std::min(num_workers, processor_count_) },
         ctype_{ ctype },
         ctype_gt_{ ctype_gt },
         queue_{ static_cast<unsigned>(batch_size * queue_ratio_size * std::min(num_workers, processor_count_)) }
     {
         // resize current_batch_ to the number of splits and initialize it with 0
         current_batch_.resize(split_.size(), 0);

         // Initialize n_channels_
         Image tmp = samples_[0].LoadImage(ctype);
         n_channels_ = tmp.Channels();

         if (!split_.empty()) {
             current_split_ = 0;
             // Initialize resize_dims_ after that augmentations on the first image are performed
             if (augs_.IsEmpty()) {
                 std::cout << ECVL_WARNING_MSG << "Augmentations are empty!" << std::endl;
             }
             else {
                 while (!augs_.Apply(current_split_, tmp)) {
                     ++current_split_;
                 }
             }
             auto y = tmp.channels_.find('y');
             auto x = tmp.channels_.find('x');
             assert(y != std::string::npos && x != std::string::npos);
             resize_dims_.insert(resize_dims_.begin(), { tmp.dims_[y],tmp.dims_[x] });

             // Initialize n_channels_gt_ if exists
             if (samples_[0].label_path_ != nullopt) {
                 n_channels_gt_ = samples_[0].LoadImage(ctype_gt_, true).Channels();
             }
         }
         else {
             std::cout << ECVL_WARNING_MSG << "Missing splits in the dataset file." << std::endl;
         }

         // Set drop_last parameter for each split
         if (!drop_last.empty()) {
             if (drop_last.size() == vsize(split_)) {
                 for (int i = 0; i < vsize(split_); ++i) {
                     split_[i].drop_last_ = drop_last.at(split_[i].split_name_);
                 }
             }
             else {
                 std::cout << ECVL_WARNING_MSG << "drop_last is not empty but the provided size is different from the size of the splits. The default value 'false' is set for all the splits" << std::endl;
             }
         }

         // Initialize num_batches, last_batch and the ThreadCounters for each split
         auto s_index = 0;
         splits_tc_ = std::vector<std::vector<ThreadCounters>>(vsize(split_));
         for (auto& s : split_) {
             s.SetNumBatches(batch_size_);
             s.SetLastBatch(batch_size_);

             InitTC(s_index);
             ++s_index;
         }

         SetTensorsShape();
     }

     void ResetBatch(const ecvl::any& split = -1, bool shuffle = false);

     void ResetAllBatches(bool shuffle = false);

     void LoadBatch(Tensor*& images, Tensor*& labels);

     void LoadBatch(Tensor*& images);

     static void SetSplitSeed(unsigned seed) { re_.seed(seed); }

     void SetBatchSize(int bs);

     virtual void ProduceImageLabel(DatasetAugmentations& augs, Sample& elem);

     void ThreadFunc(int thread_index);

     std::tuple<std::vector<Sample>, std::shared_ptr<Tensor>, std::shared_ptr<Tensor>> GetBatch();

     void Start(int split_index = -1);

     void Stop();

     auto GetQueueSize() const { return queue_.Length(); };

     void SetAugmentations(const DatasetAugmentations& da);

     const int GetNumBatches(const ecvl::any& split = -1);

     void ToTensorPlane(const std::vector<int>& label, Tensor*& tensor);

     void SetWorkers(const unsigned num_workers)
     {
         if (num_workers < 0) {
             ECVL_ERROR_WORKERS_LESS_THAN_ONE
         }

         num_workers_ = std::min(num_workers, processor_count_);
         for (int i = 0; i < vsize(split_); ++i) {
             InitTC(i);
         }
     }

     void SetNumChannels(const int n_channels, const int n_channels_gt = 1)
     {
         n_channels_ = n_channels;
         n_channels_gt_ = n_channels_gt;
         SetTensorsShape();
     }
 };

 Image MakeGrid(Tensor*& t, int cols = 8, bool normalize = false);

 } // namespace ecvl

 #endif // ECVL_SUPPORT_EDDL_H_
ecvl::ProducersConsumerQueue::ProducersConsumerQueue
ProducersConsumerQueue(unsigned mxsz, unsigned thresh)
Definition: support_eddl.h:162

ecvl::DatasetAugmentations
Dataset Augmentations.
Definition: support_eddl.h:95

ecvl::DLDataset::resize_dims_
std::vector< int > resize_dims_
Dimensions (HxW) to which Dataset images must be resized.
Definition: support_eddl.h:325

ecvl::DLDataset::current_batch_
std::vector< int > current_batch_
Number of batches already loaded for each split.
Definition: support_eddl.h:327

ecvl::Dataset::split_
std::vector< Split > split_
Splits of the Dataset. See Split.
Definition: dataset_parser.h:145

ecvl::Image
Image class.
Definition: image.h:66

ecvl::DLDataset::batch_size_
int batch_size_
Size of each dataset mini batch.
Definition: support_eddl.h:326

ecvl::Task::classification

ecvl::DLDataset::n_channels_
int n_channels_
Number of channels of the images.
Definition: support_eddl.h:323

ecvl::DLDataset::SetWorkers
void SetWorkers(const unsigned num_workers)
Change the number of workers.
Definition: support_eddl.h:523

ecvl::ProducersConsumerQueue::Pop
void Pop(Sample &sample, Tensor *&image, Tensor *&label)
Pop a sample from the queue.
Definition: support_eddl.h:202

ecvl::ThreadCounters
Class representing the thread counters.
Definition: support_eddl.h:271

ecvl::DLDataset::tensors_shape_
std::pair< std::vector< int >, std::vector< int > > tensors_shape_
Shape of sample and label tensors.
Definition: support_eddl.h:332

ecvl::ThreadCounters::max_
int max_
Indices of samples managed by the thread in the interval [min_, max_).
Definition: support_eddl.h:275

ecvl::ProducersConsumerQueue
Class that manages the producers-consumer queue of samples. The queue stores pairs of image and label...
Definition: support_eddl.h:143

ecvl::TensorToImage
void TensorToImage(const Tensor *t, Image &img)
Convert an EDDL Tensor into an ECVL Image.

ecvl::DLDataset::ToTensorPlane
void ToTensorPlane(const std::vector< int > &label, Tensor *&tensor)
Convert the sample labels in a one-hot encoded tensor and copy it to the batch tensor.

ecvl::DLDataset::splits_tc_
std::vector< std::vector< ThreadCounters > > splits_tc_
Each dataset split has its own vector of threads, each of which has its counters: <counter,...
Definition: support_eddl.h:293

ecvl::vsize
int vsize(const std::vector< T > &v)
Definition: image.h:35

ecvl::ProducersConsumerQueue::FreeLockedOnPush
void FreeLockedOnPush()
Free threads locked on a push operation.
Definition: support_eddl.h:188

ecvl::Dataset::classes_
std::vector< std::string > classes_
Vector with all the classes available in the Dataset.
Definition: dataset_parser.h:142

ecvl::DLDataset::n_channels_gt_
int n_channels_gt_
Number of channels of the ground truth images.
Definition: support_eddl.h:324

ecvl::ColorType
ColorType
Enum class representing the ECVL supported color spaces.
Definition: image.h:44

augmentations.h

ecvl::ProducersConsumerQueue::ProducersConsumerQueue
ProducersConsumerQueue()
Definition: support_eddl.h:153

ecvl::DLDataset::SetBatchSize
void SetBatchSize(int bs)
Set a new batch size inside the dataset.

ecvl::DLDataset::ProduceImageLabel
virtual void ProduceImageLabel(DatasetAugmentations &augs, Sample &elem)
Load a sample and its label, and push them to the producers-consumer queue.

dataset_parser.h

ecvl::DLDataset::num_workers_
unsigned num_workers_
Number of parallel workers.
Definition: support_eddl.h:292

ecvl::DLDataset::InitTC
void InitTC(int split_index)
Set which are the indices of the samples managed by each thread.

ecvl::TensorToView
void TensorToView(const Tensor *t, View< DataType::float32 > &v)
Convert an EDDL Tensor into an ECVL View.

ecvl::ImageToTensor
void ImageToTensor(const Image &img, Tensor *&t)
Convert an ECVL Image into an EDDL Tensor.

ecvl::ProducersConsumerQueue::Length
size_t Length() const
Calculate the current size of the queue.
Definition: support_eddl.h:237

ecvl::DLDataset::SetNumChannels
void SetNumChannels(const int n_channels, const int n_channels_gt=1)
Change the number of channels of the Image produced by ECVL and update the internal EDDL tensors shap...
Definition: support_eddl.h:541

image.h

ecvl::DLDataset::active_
bool active_
Whether the threads have already been launched or not.
Definition: support_eddl.h:295

ecvl::DatasetAugmentations::DatasetAugmentations
DatasetAugmentations(const DatasetAugmentations &other)
Definition: support_eddl.h:102

ecvl::DLDataset::active_mutex_
std::mutex active_mutex_
Mutex for active_ variable.
Definition: support_eddl.h:296

ecvl::DLDataset::SetTensorsShape
void SetTensorsShape()
Set internal EDDL tensors shape.
Definition: support_eddl.h:306

ecvl
Definition: any.h:69

ecvl::DLDataset::re_
static std::default_random_engine re_
Engine used for random number generation.
Definition: support_eddl.h:297

ecvl::ProducersConsumerQueue::SetSize
void SetSize(int max_size, int thresh=-1)
Set the maximum size of the queue and optionally the threshold from which restart to produce samples.
Definition: support_eddl.h:248

filesystem.h

ecvl::Dataset::current_split_
int current_split_
Current split from which images are loaded.
Definition: dataset_parser.h:146

ecvl::DLDataset::ctype_
ColorType ctype_
ecvl::ColorType of the Dataset images.
Definition: support_eddl.h:328

ecvl::Dataset::task_
Task task_
Task of the dataset.
Definition: dataset_parser.h:147

ecvl::DatasetAugmentations::DatasetAugmentations
DatasetAugmentations(const std::vector< std::shared_ptr< Augmentation >> &augs)
Definition: support_eddl.h:99

ecvl::ThreadCounters::counter_
int counter_
Index of the sample currently used by the thread.
Definition: support_eddl.h:274

ecvl::DLDataset::GetNumBatches
const int GetNumBatches(const ecvl::any &split=-1)
Get the number of batches of the specified split.

ecvl::ProducersConsumerQueue::ProducersConsumerQueue
ProducersConsumerQueue(unsigned mxsz)
Definition: support_eddl.h:157

ECVL_ERROR_WORKERS_LESS_THAN_ONE
#define ECVL_ERROR_WORKERS_LESS_THAN_ONE
Definition: support_eddl.h:34

ecvl::DLDataset::Stop
void Stop()
Join all the threads.

ecvl::DLDataset::ResetBatch
void ResetBatch(const ecvl::any &split=-1, bool shuffle=false)
Reset the batch counter and optionally shuffle samples indices of the specified split.

ecvl::Sample
Sample image in a dataset.
Definition: dataset_parser.h:58

ecvl::DLDataset::SetAugmentations
void SetAugmentations(const DatasetAugmentations &da)
Set the dataset augmentations.

ecvl::DLDataset::LoadBatch
void LoadBatch(Tensor *&images, Tensor *&labels)
Load a batch into images and labels tensor.

ecvl::DLDataset::DLDataset
DLDataset(const filesystem::path &filename, const int batch_size, const DatasetAugmentations &augs, const ColorType ctype=ColorType::RGB, const ColorType ctype_gt=ColorType::GRAY, const unsigned num_workers=1, const double queue_ratio_size=1., const std::unordered_map< std::string, bool > &drop_last=std::unordered_map< std::string, bool >{}, bool verify=false)
Definition: support_eddl.h:345

ecvl::ColorType::GRAY

ecvl::DatasetAugmentations::Apply
bool Apply(const int split, Image &img, const Image &gt=Image())
Definition: support_eddl.h:111

ecvl::ThreadCounters::ThreadCounters
ThreadCounters(int min, int max)
Definition: support_eddl.h:277

ecvl::DLDataset::ctype_gt_
ColorType ctype_gt_
ecvl::ColorType of the Dataset ground truth images.
Definition: support_eddl.h:329

ecvl::DatasetAugmentations::Apply
bool Apply(SplitType st, Image &img, const Image &gt=Image())
Definition: support_eddl.h:126

ecvl::ProducersConsumerQueue::Clear
void Clear()
Definition: support_eddl.h:254

ecvl::DLDataset::queue_
ProducersConsumerQueue queue_
Producers-consumer queue of the dataset.
Definition: support_eddl.h:331

ecvl::DLDataset::GetQueueSize
auto GetQueueSize() const
Get the current size of the producers-consumer queue of the dataset.
Definition: support_eddl.h:496

ecvl::any
std::experimental::any any
Definition: any.h:71

ECVL_WARNING_MSG
#define ECVL_WARNING_MSG
Definition: standard_errors.h:20

ecvl::ThreadCounters::Reset
void Reset()
Reset the thread counter to its minimum value.
Definition: support_eddl.h:279

ecvl::Dataset
DeepHealth Dataset.
Definition: dataset_parser.h:131

ecvl::ThreadCounters::ThreadCounters
ThreadCounters(int counter, int min, int max)
Definition: support_eddl.h:278

ecvl::DLDataset::augs_
DatasetAugmentations augs_
ecvl::DatasetAugmentations to be applied to the Dataset images (and ground truth if exist) for each s...
Definition: support_eddl.h:330

ecvl::ProducersConsumerQueue::IsFull
bool IsFull() const
Check if the queue is full.
Definition: support_eddl.h:217

ecvl::ProducersConsumerQueue::IsEmpty
bool IsEmpty() const
Check if the queue is empty.
Definition: support_eddl.h:227

ecvl::DLDataset::SetSplitSeed
static void SetSplitSeed(unsigned seed)
Set a fixed seed for the random generated values. Useful to reproduce experiments with same shuffling...
Definition: support_eddl.h:452

ecvl::SplitType
SplitType
Enum class representing the Dataset supported splits.
Definition: dataset_parser.h:40

ecvl::Task::segmentation

ecvl::ProducersConsumerQueue::Push
void Push(const Sample &sample, Tensor *const image, Tensor *const label)
Push a sample in the queue.
Definition: support_eddl.h:172

ecvl::ColorType::RGB

ecvl::DLDataset::Start
void Start(int split_index=-1)
Spawn num_workers thread.

ecvl::Dataset::samples_
std::vector< Sample > samples_
Vector containing all the Dataset samples. See Sample.
Definition: dataset_parser.h:144

ecvl::DLDataset
DeepHealth Deep Learning Dataset.
Definition: support_eddl.h:288

ECVL_ERROR_AUG_DOES_NOT_EXIST
#define ECVL_ERROR_AUG_DOES_NOT_EXIST
Definition: support_eddl.h:33

ecvl::DLDataset::ThreadFunc
void ThreadFunc(int thread_index)
Function called when the thread are spawned.

ecvl::DLDataset::GetBatch
std::tuple< std::vector< Sample >, std::shared_ptr< Tensor >, std::shared_ptr< Tensor > > GetBatch()
Pop batch_size samples from the queue and copy them into EDDL tensors.

ecvl::ThreadCounters::min_
int min_
Definition: support_eddl.h:275

ecvl::MakeGrid
Image MakeGrid(Tensor *&t, int cols=8, bool normalize=false)
Make a grid of images from a EDDL Tensor.

ecvl::DLDataset::ResetAllBatches
void ResetAllBatches(bool shuffle=false)
Reset the batch counter of each split and optionally shuffle samples indices (within each split).

ecvl::DatasetAugmentations::IsEmpty
bool IsEmpty() const
Definition: support_eddl.h:131

ecvl::DLDataset::producers_
std::vector< std::thread > producers_
Vector of threads representing the samples producers.
Definition: support_eddl.h:294