14 #ifndef ECVL_SUPPORT_EDDL_H_ 15 #define ECVL_SUPPORT_EDDL_H_ 22 #include <eddl/apis/eddl.h> 25 #include <condition_variable> 33 #define ECVL_ERROR_AUG_DOES_NOT_EXIST throw std::runtime_error(ECVL_ERROR_MSG "Augmentation for this split does not exist"); 34 #define ECVL_ERROR_WORKERS_LESS_THAN_ONE throw std::runtime_error(ECVL_ERROR_MSG "Dataset workers must be at least one"); 57 void ImageToTensor(
const Image& img, Tensor*& t,
const int& offset);
85 void TensorToView(
const Tensor* t, View<DataType::float32>& v);
97 std::vector<std::shared_ptr<Augmentation>> augs_;
104 for (
const auto& a : other.augs_) {
105 augs_.emplace_back(a ? a->Clone() :
nullptr);
115 if (augs_.at(split)) {
116 augs_[split]->Apply(img, gt);
121 catch (
const std::out_of_range) {
128 return Apply(+st, img, gt);
133 return augs_.empty();
145 std::condition_variable cond_notempty_;
146 std::condition_variable cond_notfull_;
147 mutable std::mutex mutex_;
148 std::queue<std::tuple<Sample, Tensor*, Tensor*>> cpq_;
172 void Push(
const Sample& sample, Tensor*
const image, Tensor*
const label)
174 std::unique_lock<std::mutex> lock(mutex_);
175 cond_notfull_.wait(lock, [
this]() {
return cpq_.size() < max_size_; });
176 cpq_.push(std::make_tuple(sample, image, label));
177 cond_notempty_.notify_one();
189 void Pop(
Sample& sample, Tensor*& image, Tensor*& label)
191 std::unique_lock<std::mutex> lock(mutex_);
192 cond_notempty_.wait(lock, [
this]() {
return !cpq_.empty(); });
193 std::tie(sample, image, label) = cpq_.front();
195 if (cpq_.size() < threshold_) {
196 cond_notfull_.notify_all();
206 std::unique_lock<std::mutex> lock(mutex_);
207 return cpq_.size() >= max_size_;
216 std::unique_lock<std::mutex> lock(mutex_);
226 std::unique_lock<std::mutex> lock(mutex_);
237 max_size_ = max_size;
238 threshold_ = thresh != -1 ? thresh : max_size / 2;
243 std::unique_lock<std::mutex> lock(mutex_);
246 while (!cpq_.empty()) {
277 const unsigned processor_count_ = std::thread::hardware_concurrency();
291 static std::default_random_engine
re_;
297 void InitTC(
int split_index);
304 tensors_shape_ = std::make_pair<std::vector<int>, std::vector<int>>(
309 tensors_shape_ = std::make_pair<std::vector<int>, std::vector<int>>(
333 const int batch_size,
337 const unsigned num_workers = 1,
338 const double queue_ratio_size = 1.,
339 const std::vector<bool>& drop_last = {},
340 bool verify =
false) :
345 num_workers_{ std::min(num_workers, processor_count_) },
348 queue_{ static_cast<unsigned>(batch_size * queue_ratio_size * std::min(num_workers, processor_count_)) }
354 Image tmp =
samples_[0].LoadImage(ctype);
368 auto y = tmp.channels_.find(
'y');
369 auto x = tmp.channels_.find(
'x');
370 assert(y != std::string::npos && x != std::string::npos);
374 if (
samples_[0].label_path_ != nullopt) {
379 std::cout <<
ECVL_WARNING_MSG <<
"Missing splits in the dataset file." << std::endl;
383 if (!drop_last.empty()) {
385 for (
int i = 0; i <
vsize(drop_last); ++i) {
386 split_[i].drop_last_ = drop_last[i];
390 std::cout <<
ECVL_WARNING_MSG <<
"drop_last is not empty but the provided size is different from the size of the splits. The default value 'false' is set for all the splits" << std::endl;
427 void LoadBatch(Tensor*& images, Tensor*& labels);
468 std::tuple<std::vector<Sample>, std::shared_ptr<Tensor>, std::shared_ptr<Tensor>>
GetBatch();
474 void Start(
int split_index = -1);
504 void ToTensorPlane(
const std::vector<int>& label, Tensor*& tensor);
512 if (num_workers < 0) {
546 Image
MakeGrid(Tensor*& t,
int cols = 8,
bool normalize =
false);
553 #endif // ECVL_SUPPORT_EDDL_H_ ProducersConsumerQueue(unsigned mxsz, unsigned thresh)
std::vector< int > resize_dims_
Dimensions (HxW) to which Dataset images must be resized.
std::vector< int > current_batch_
Number of batches already loaded for each split.
std::vector< Split > split_
Splits of the Dataset. See Split.
int batch_size_
Size of each dataset mini batch.
int n_channels_
Number of channels of the images.
void SetWorkers(const unsigned num_workers)
Change the number of workers.
void Pop(Sample &sample, Tensor *&image, Tensor *&label)
Pop a sample from the queue.
Class representing the thread counters.
std::pair< std::vector< int >, std::vector< int > > tensors_shape_
Shape of sample and label tensors.
int max_
Indices of samples managed by the thread in the interval [min_, max_).
Class that manages the producers-consumer queue of samples. The queue stores pairs of image and label...
void TensorToImage(const Tensor *t, Image &img)
Convert an EDDL Tensor into an ECVL Image.
void ToTensorPlane(const std::vector< int > &label, Tensor *&tensor)
Convert the sample labels in a one-hot encoded tensor and copy it to the batch tensor.
std::vector< std::vector< ThreadCounters > > splits_tc_
Each dataset split has its own vector of threads, each of which has its counters: <counter,...
int vsize(const std::vector< T > &v)
std::vector< std::string > classes_
Vector with all the classes available in the Dataset.
int n_channels_gt_
Number of channels of the ground truth images.
ColorType
Enum class representing the ECVL supported color spaces.
void SetBatchSize(int bs)
Set a new batch size inside the dataset.
virtual void ProduceImageLabel(DatasetAugmentations &augs, Sample &elem)
Load a sample and its label, and push them to the producers-consumer queue.
unsigned num_workers_
Number of parallel workers.
void InitTC(int split_index)
Set which are the indices of the samples managed by each thread.
void TensorToView(const Tensor *t, View< DataType::float32 > &v)
Convert an EDDL Tensor into an ECVL View.
void ImageToTensor(const Image &img, Tensor *&t)
Convert an ECVL Image into an EDDL Tensor.
size_t Length() const
Calculate the current size of the queue.
void SetNumChannels(const int n_channels, const int n_channels_gt=1)
Change the number of channels of the Image produced by ECVL and update the internal EDDL tensors shap...
bool active_
Whether the threads have already been launched or not.
DatasetAugmentations(const DatasetAugmentations &other)
std::mutex active_mutex_
Mutex for active_ variable.
void SetTensorsShape()
Set internal EDDL tensors shape.
static std::default_random_engine re_
Engine used for random number generation.
void SetSize(int max_size, int thresh=-1)
Set the maximum size of the queue and optionally the threshold from which restart to produce samples.
int current_split_
Current split from which images are loaded.
ColorType ctype_
ecvl::ColorType of the Dataset images.
Task task_
Task of the dataset.
DatasetAugmentations(const std::vector< std::shared_ptr< Augmentation >> &augs)
int counter_
Index of the sample currently used by the thread.
const int GetNumBatches(const ecvl::any &split=-1)
Get the number of batches of the specified split.
ProducersConsumerQueue(unsigned mxsz)
#define ECVL_ERROR_WORKERS_LESS_THAN_ONE
void Stop()
Join all the threads.
void ResetBatch(const ecvl::any &split=-1, bool shuffle=false)
Reset the batch counter and optionally shuffle samples indices of the specified split.
Sample image in a dataset.
void SetAugmentations(const DatasetAugmentations &da)
Set the dataset augmentations.
void LoadBatch(Tensor *&images, Tensor *&labels)
Load a batch into images and labels tensor.
bool Apply(const int split, Image &img, const Image >=Image())
ThreadCounters(int min, int max)
ColorType ctype_gt_
ecvl::ColorType of the Dataset ground truth images.
bool Apply(SplitType st, Image &img, const Image >=Image())
ProducersConsumerQueue queue_
Producers-consumer queue of the dataset.
auto GetQueueSize() const
Get the current size of the producers-consumer queue of the dataset.
std::experimental::any any
void Reset()
Reset the thread counter to its minimum value.
ThreadCounters(int counter, int min, int max)
DatasetAugmentations augs_
ecvl::DatasetAugmentations to be applied to the Dataset images (and ground truth if exist) for each s...
bool IsFull() const
Check if the queue is full.
bool IsEmpty() const
Check if the queue is empty.
static void SetSplitSeed(unsigned seed)
Set a fixed seed for the random generated values. Useful to reproduce experiments with same shuffling...
DLDataset(const filesystem::path &filename, const int batch_size, const DatasetAugmentations &augs, const ColorType ctype=ColorType::RGB, const ColorType ctype_gt=ColorType::GRAY, const unsigned num_workers=1, const double queue_ratio_size=1., const std::vector< bool > &drop_last={}, bool verify=false)
SplitType
Enum class representing the Dataset supported splits.
void Push(const Sample &sample, Tensor *const image, Tensor *const label)
Push a sample in the queue.
void Start(int split_index=-1)
Spawn num_workers thread.
std::vector< Sample > samples_
Vector containing all the Dataset samples. See Sample.
DeepHealth Deep Learning Dataset.
#define ECVL_ERROR_AUG_DOES_NOT_EXIST
void ThreadFunc(int thread_index)
Function called when the thread are spawned.
std::tuple< std::vector< Sample >, std::shared_ptr< Tensor >, std::shared_ptr< Tensor > > GetBatch()
Pop batch_size samples from the queue and copy them into EDDL tensors.
Image MakeGrid(Tensor *&t, int cols=8, bool normalize=false)
Make a grid of images from a EDDL Tensor.
void ResetAllBatches(bool shuffle=false)
Reset the batch counter of each split and optionally shuffle samples indices (within each split).
std::vector< std::thread > producers_
Vector of threads representing the samples producers.