philsupertramp/game-math
Loading...
Searching...
No Matches
ImageDataSet.h
Go to the documentation of this file.
1#pragma once
2
3#include "DataSet.h"
4
6#if MATH_IMAGE_PROCESSING
7 #include <Magick++.h>
8 #include <Magick++/Image.h>
9#endif
10
11#include <filesystem>
12
16class ImageDataSet : public DataSet
17{
18public:
24 ImageDataSet(size_t inputCount, size_t outputCount)
25 : DataSet(inputCount, outputCount) { }
26
31 virtual void PrepareDirectory(const char* filePath) {
34 std::cout << "Moving files..." << std::flush;
35 std::filesystem::remove_all(trainDirectory);
36 std::filesystem::copy(
37 filePath,
39 std::filesystem::copy_options::update_existing | std::filesystem::copy_options::recursive);
40
41 totalCount = 0;
42 size_t classCount = 0;
43 classNames.clear();
44 for(const auto& entry : std::filesystem::directory_iterator(trainDirectory)) {
45 if(entry.is_directory()) {
46 classNames.push_back(entry.path().filename());
47 auto dirIter = std::filesystem::directory_iterator(entry);
48 totalCount += std::count_if(begin(dirIter), end(dirIter), [](auto& elem) { return elem.is_regular_file(); });
49 classCount += 1;
50 }
51 }
52 trainingCount = (int)(totalCount * 0.8);
54 Training.Input = MatrixDS<double>(0, trainingCount, InputCount);
55 Training.Output = MatrixDS<double>(0, trainingCount, OutputCount);
56 Validation.Input = MatrixDS<double>(0, validationCount, InputCount);
57 Validation.Output = MatrixDS<double>(0, validationCount, OutputCount);
58
59 size_t index = 0;
60 for(const auto& entry : std::filesystem::directory_iterator(trainDirectory)) {
61 if(entry.is_directory()) {
62 MatrixDS<bool> elem(false, 1, classCount);
63 elem(0, index) = true;
64 classes.push_back(elem);
65 index += 1;
66 }
67 }
68 std::cout << "done!\nDirectory \"" << trainDirectory << "\" ready." << std::endl;
69 std::cout << "Found " << totalCount << " files belonging to " << classCount << " classes.\nUsing "
70 << validationCount << " files for validation\n"
71 << std::flush;
72 }
73
85 void Cache() {
86#if MATH_IMAGE_PROCESSING
87 std::cout << "Resizing files and saving into memory..." << std::flush;
88 size_t count = 0, i = 0, classCount = 0;
89 size_t trainingIter = 0, validationIter = 0;
90 for(const auto& classDirectory : std::filesystem::directory_iterator(trainDirectory)) {
91 bool hadEntry = false;
92 if(!classDirectory.is_directory() || classCount >= classes.size()) continue;
93 for(const auto& entry : std::filesystem::directory_iterator(classDirectory)) {
94 if(!entry.is_regular_file()) continue;
95
96 hadEntry = true;
97 Magick::Image image;
98 image.verbose(verbose);
99 image.magick("JPG");
100 image.read(std::string(entry.path()));
101 // PZ: we don't scale but resample, to enforce dimension width * height and ignore loss in content
102 image.resample(Magick::Point(imageWidth, imageHeight));
103 Set* target;
104 bool is_validation =
105 (trainingIter >= trainingCount || ((count % (int)totalCount * (validationShare)) == 0 && count != 0 && validationIter < validationCount));
106 target = is_validation ? &Validation : &Training;
107 Magick::PixelData pixelBlob(image, "RGBA", Magick::FloatPixel);
108
109 auto* in = (float*)pixelBlob.data();
110
111 for(i = 0; i < InputCount; i++) { (*target).Input[is_validation ? validationIter : trainingIter][i] = in[i]; }
112 for(i = 0; i < OutputCount; i++) {
113 (*target).Output[is_validation ? validationIter : trainingIter][i] = classes[classCount][0][i];
114 }
115 if(is_validation) {
116 validationIter += 1;
117 } else {
118 trainingIter += 1;
119 }
120 count += 1;
121 }
122 classCount += hadEntry;
123 }
124#endif
125 }
126
127public:
129 size_t imageHeight = 180;
131 size_t imageWidth = 180;
133 double validationShare = 0.2;
135 const char* trainDirectory = "../../resources/image_classification/training/";
137 std::vector<MatrixDS<bool>> classes;
139 std::vector<std::string> classNames;
141 size_t totalCount = 0;
143 size_t trainingCount = 0;
145 size_t validationCount = 0;
146};
Definition: DataSet.h:195
size_t OutputCount
number output elements
Definition: DataSet.h:234
Set Validation
set for validation of training
Definition: DataSet.h:239
size_t InputCount
number input elements
Definition: DataSet.h:232
Set Training
set for training
Definition: DataSet.h:237
bool verbose
use verbose output during fitting
Definition: DataSet.h:252
only include Magick++ if needed
Definition: ImageDataSet.h:17
double validationShare
percentage of validation data
Definition: ImageDataSet.h:133
size_t validationCount
number of validation data records
Definition: ImageDataSet.h:145
size_t trainingCount
number of training data records
Definition: ImageDataSet.h:143
std::vector< MatrixDS< bool > > classes
representation of all classes
Definition: ImageDataSet.h:137
ImageDataSet(size_t inputCount, size_t outputCount)
Definition: ImageDataSet.h:24
std::vector< std::string > classNames
representation of all class names
Definition: ImageDataSet.h:139
size_t totalCount
total number of data records
Definition: ImageDataSet.h:141
size_t imageWidth
desired image width
Definition: ImageDataSet.h:131
virtual void PrepareDirectory(const char *filePath)
Definition: ImageDataSet.h:31
size_t imageHeight
desired image height
Definition: ImageDataSet.h:129
void Cache()
Definition: ImageDataSet.h:85
const char * trainDirectory
target directory for training
Definition: ImageDataSet.h:135
Definition: DataSet.h:18
Matrix< double > Input
input data
Definition: DataSet.h:20
Matrix< double > Output
expected output data
Definition: DataSet.h:22