dataset_parser.h
Go to the documentation of this file.
1 /*
2 * ECVL - European Computer Vision Library
3 * Version: 0.3.4
4 * copyright (c) 2021, Università degli Studi di Modena e Reggio Emilia (UNIMORE), AImageLab
5 * Authors:
6 * Costantino Grana (costantino.grana@unimore.it)
7 * Federico Bolelli (federico.bolelli@unimore.it)
8 * Michele Cancilla (michele.cancilla@unimore.it)
9 * Laura Canalini (laura.canalini@unimore.it)
10 * Stefano Allegretti (stefano.allegretti@unimore.it)
11 * All rights reserved.
12 */
13 
14 #ifndef ECVL_DATASET_PARSER_H_
15 #define ECVL_DATASET_PARSER_H_
16 
17 #include "ecvl/core.h"
18 #include "ecvl/core/filesystem.h"
19 #include "ecvl/core/optional.h"
20 
21 #include <iostream>
22 #include <map>
23 #include <vector>
24 #include <regex>
25 
26 #include "yaml-cpp/yaml.h"
27 
28 // This allows to define strongly typed enums and convert them to int with just a + in front
29 #define UNSIGNED_ENUM_CLASS(name, ...) enum class name : unsigned { __VA_ARGS__ };\
30 inline constexpr unsigned operator+ (name const val) { return static_cast<unsigned>(val); }
31 
32 namespace ecvl
33 {
38 UNSIGNED_ENUM_CLASS(SplitType, training, validation, test)
45 class Sample
46 {
47 public:
48  std::vector<filesystem::path> location_;
52  std::vector<int> size_;
63  ecvl::Image LoadImage(ecvl::ColorType ctype = ecvl::ColorType::BGR, const bool& is_gt = false);
64 };
65 
72 class Split
73 {
74 public:
75  std::vector<int> training_;
76  std::vector<int> validation_;
77  std::vector<int> test_;
78 };
79 
86 class Dataset
87 {
88 public:
89  std::string name_ = "DeepHealth dataset";
90  std::string description_ = "This is the DeepHealth example dataset!";
91  std::vector<std::string> classes_;
92  std::vector<std::string> features_;
93  std::vector<Sample> samples_;
96  Dataset() {}
97 
102  Dataset(const filesystem::path& filename, bool verify = false);
103 
111  void Dump(const filesystem::path& file_path);
112 
113  // RegEx which matchs URLs
114  static const std::regex url_regex_;
115 
116 private:
117  std::map<std::string, int> features_map_;
118  void DecodeImages(const YAML::Node& node, const filesystem::path& root_path, bool verify);
119  void FindLabel(Sample& sample, const YAML::Node& n);
120 };
121 } // namespace ecvl
122 
124 namespace YAML
125 {
130 template<>
131 struct convert<ecvl::Split>
132 {
133  /*static Node encode(const ecvl::Split& rhs)
134  {
135  Node node;
136  node.push_back(rhs.x);
137  return node;
138  }*/
139 
140  static bool decode(const YAML::Node& node, ecvl::Split& rhs)
141  {
142  if (node["training"].IsDefined()) {
143  rhs.training_ = node["training"].as<std::vector<int>>();
144  }
145  if (node["validation"].IsDefined()) {
146  rhs.validation_ = node["validation"].as<std::vector<int>>();
147  }
148  if (node["test"].IsDefined()) {
149  rhs.test_ = node["test"].as<std::vector<int>>();
150  }
151  return true;
152  }
153 };
154 } // namespace YAML
157 #endif // ECVL_DATASET_PARSER_H_
Image class.
Definition: image.h:72
std::vector< int > training_
Vector containing samples of training split.
void Dump(const filesystem::path &file_path)
Dump the Dataset into a YAML file following the DeepHealth Dataset Format.
optional< std::vector< int > > label_
Vector of sample labels.
std::vector< std::string > classes_
Vector with all the classes available in the Dataset.
std::string description_
Description of the Dataset.
ColorType
Enum class representing the ECVL supported color spaces.
Definition: image.h:50
std::vector< std::string > features_
Vector with all the features available in the Dataset.
std::vector< int > size_
Original x and y dimensions of the sample.
#define UNSIGNED_ENUM_CLASS(name,...)
optional< std::map< int, std::string > > values_
Map (map<feature-index,feature-value>) which stores the features of a sample.
Split split_
Splits of the Dataset. See Split.
optional< filesystem::path > label_path_
Absolute path of sample ground truth.
std::vector< filesystem::path > location_
Absolute path of the sample.
Sample image in a dataset.
Splits of a dataset.
std::experimental::optional< T > optional
Definition: optional.h:72
std::string name_
Name of the Dataset.
std::vector< int > validation_
Vector containing samples of validation split.
std::vector< int > test_
Vector containing samples of test split.
DeepHealth Dataset.
static const std::regex url_regex_
SplitType
Enum class representing the Dataset supported splits.
std::vector< Sample > samples_
Vector containing all the Dataset samples. See Sample.