/** * @file trainingData.h * @author Michael Zbyszynski * @date 2 Feb 2017 * @copyright * Copyright © 2017 Goldsmiths. All rights reserved. * * @ingroup machinelearning */ #ifndef trainingData_h #define trainingData_h #include <vector> #include <string> #include <unordered_map> #include "rapidMix.h" #include "json.h" namespace rapidmix { /** This is used by both NN and KNN models for training */ class trainingData { public: trainingData(); struct element{ uint32_t uniqueId; //MZ: Does this scope of this id need to extend beyond this instantiation? std::vector<double> input; std::vector<double> output; double timeStamp; }; struct phrase { uint32_t uniqueId; std::string label; //TODO: Need to work this with templates std::vector<std::string> columnNames; //equal to the number of inputs std::vector<element> elements; void addElement (const std::vector<double> &input, const std::vector<double> &output) { } }; std::vector<phrase> trainingSet; //TODO: Deleting phrases (last or by label) //Design ideas to make phrase building stateless: //phrase& createNewPhrase(); //addElementToPhrase(); /** Create a new phrase that can be recorded into. Returns phrase id */ uint32_t startRecording(); //FIXME: this should go away. -MZ /** Create new phrase, with a label, that can be recorded into. Returns phrase id */ uint32_t startRecording(const std::string &label); /** Add an element with input and output to the phrase that is recording, or to the default phrase if recording is stopped. Returns phrase id. */ uint32_t addElement(const std::vector<double> &input, const std::vector<double> &output); /** Add an element with just input to the phrase that is recording, or to the default phrase if recording is stopped. Returns phrase id. */ uint32_t addElement(const std::vector<double> &input); void stopRecording(); /** Create a phrase with a single element that has a label and input. Returns phrase id. */ uint32_t recordSingleElement(const std::string &label, const std::vector<double> &input); /** Create a phrase with a single element that has input, and output. Returns phrase id. */ uint32_t recordSingleElement(const std::vector<double> &input, const std::vector<double> &output); /** Create a phrase with a single element that has a label, input, and output. Returns phrase id. */ uint32_t recordSingleElement(const std::string &label, const std::vector<double> &input, const std::vector<double> &output); std::vector<std::string> getColumnNames(); void setColumnNames(const std::vector<std::string> &columnNames); /** Get a JSON representation of the data set in the form of a styled string */ std::string getJSON(); /** Write a JSON version of the training set to specified file path */ void writeJSON(const std::string &filepath); /** populate a data set with string. See getJSON() */ bool putJSON(const std::string &jsonMessage); /** read a JSON file at file path and build a training set from it */ bool readJSON(const std::string &filepath); private: int targetPhrase; uint32_t currentId; //* Returns and increments current id */ uint32_t assignCurrentId(); Json::Value parse2json(); void json2trainingSet(const Json::Value &newTrainingData); }; } #endif