Skip to content
Snippets Groups Projects
trainingData.h 3.82 KiB
Newer Older
/**
 * @file trainingData.h
 * @author Michael Zbyszynski
 * @date 2 Feb 2017
 * @copyright
 * Copyright © 2017 Goldsmiths. All rights reserved.
 *
 * @ingroup machinelearning
 */

#ifndef trainingData_h
#define trainingData_h

#include <vector>
#include <string>
#include <unordered_map>

#include "rapidMix.h"
#include "json.h"

namespace rapidmix {
    
    /** This is used by both NN and KNN models for training */
    class trainingData {
        
    public:
        
        trainingData();
        
        struct element{
            uint32_t uniqueId; //MZ: Does this scope of this id need to extend beyond this instantiation?
            std::vector<double> input;
            std::vector<double> output;
            double timeStamp;
        };
        
        struct phrase {
            uint32_t uniqueId;
            std::string label; //TODO: Need to work this with templates
            std::vector<std::string> columnNames; //equal to the number of inputs
            std::vector<element> elements;
            
            void addElement (const std::vector<double> &input, const std::vector<double> &output)
            {
                
            }
        };
        
        std::vector<phrase> trainingSet;
        
        //TODO: Deleting phrases (last or by label)
        //Design ideas to make phrase building stateless:
        //phrase& createNewPhrase();
        //addElementToPhrase();
        
        /** Create a new phrase that can be recorded into. Returns phrase id */
        uint32_t startRecording(); //FIXME: this should go away. -MZ
        
        /** Create new phrase, with a label, that can be recorded into. Returns phrase id */
        uint32_t startRecording(const std::string &label);
        
        /** Add an element with input and output to the phrase that is recording,
         or to the default phrase if recording is stopped. Returns phrase id. */
        uint32_t addElement(const std::vector<double> &input, const std::vector<double> &output);
        
        /** Add an element with just input to the phrase that is recording,
         or to the default phrase if recording is stopped. Returns phrase id. */
        uint32_t addElement(const std::vector<double> &input);
        
        void stopRecording();
        
        /** Create a phrase with a single element that has a label and input. Returns phrase id. */
        uint32_t recordSingleElement(const std::string &label, const std::vector<double> &input);
        
        /** Create a phrase with a single element that has input, and output. Returns phrase id. */
        uint32_t recordSingleElement(const std::vector<double> &input, const std::vector<double> &output);
        
        /** Create a phrase with a single element that has a label, input, and output. Returns phrase id. */
        uint32_t recordSingleElement(const std::string &label, const std::vector<double> &input, const std::vector<double> &output);
        
        std::vector<std::string> getColumnNames();
        void setColumnNames(const std::vector<std::string> &columnNames);
        
        /** Get a JSON representation of the data set in the form of a styled string */
        std::string getJSON();
        /** Write a JSON version of the training set to specified file path */
        void writeJSON(const std::string &filepath);
        /** populate a data set with string. See getJSON() */
        bool putJSON(const std::string &jsonMessage);
        /** read a JSON file at file path and build a training set from it */
        bool readJSON(const std::string &filepath);
        
    private:
        int targetPhrase;
        uint32_t currentId;
        
        //* Returns and increments current id */
        uint32_t assignCurrentId();
        
        Json::Value parse2json();
        void json2trainingSet(const Json::Value &newTrainingData);