diff --git a/src/machineLearning/machineLearning.cpp b/src/machineLearning/machineLearning.cpp
index 3b4951d44290230f25a99f8f1818441d8c6d3f4a..2df2526ce62d023e169440d7592840a796643a28 100644
--- a/src/machineLearning/machineLearning.cpp
+++ b/src/machineLearning/machineLearning.cpp
@@ -1,5 +1,5 @@
 /*
- * machineLearning.h
+ * machineLearning.cpp
  * Created by Michael Zbyszynski on 10 Jan 2016
  * Copyright © 2017 Goldsmiths. All rights reserved.
  */
diff --git a/src/machineLearning/trainingData.cpp b/src/machineLearning/trainingData.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1cfb665a6d3f2f9e955be107b82f9e834c458d44
--- /dev/null
+++ b/src/machineLearning/trainingData.cpp
@@ -0,0 +1,185 @@
+/*
+ * trainingData.cpp
+ * Created by Michael Zbyszynski on 2 Feb 2017
+ * Copyright © 2017 Goldsmiths. All rights reserved.
+ */
+
+#include <fstream>
+#include "trainingData.h"
+
+RAPIDMIX_BEGIN_NAMESPACE
+
+trainingData::trainingData () {
+    phrase defaultPhrase = {0, "default"};
+    trainingSet.push_back(defaultPhrase);
+    currentId = 1;
+    targetPhrase = 0;
+};
+
+uint32_t trainingData::assignCurrentId() {
+    uint32_t returnVal = currentId;
+    ++currentId;
+    return returnVal;
+}
+
+
+uint32_t trainingData::startRecording() {
+    phrase tempPhrase = { assignCurrentId(), std::to_string(tempPhrase.uniqueId) }; //TODO: Is this label helpful? -MZ
+    trainingSet.push_back(tempPhrase);
+    targetPhrase = int(trainingSet.size() - 1);
+    return tempPhrase.uniqueId;
+};
+
+uint32_t trainingData::startRecording(std::string label) {
+    phrase tempPhrase = { assignCurrentId(), label };
+    labels.insert(std::make_pair(label, labels.size()));
+    trainingSet.push_back(tempPhrase);
+    targetPhrase = int(trainingSet.size() - 1);
+    return tempPhrase.uniqueId;
+    
+};
+
+uint32_t trainingData::addElement(std::vector<double>input, std::vector<double> output) {
+    element newElement;
+    newElement.uniqueId = assignCurrentId();
+    newElement.input = input;
+    newElement.output = output;
+    newElement.timeStamp = NULL;
+    trainingSet[targetPhrase].elements.push_back(newElement);
+    return newElement.uniqueId;
+}
+
+
+void trainingData::stopRecording() {
+    targetPhrase = 0; //direct elements to default phrase
+}
+
+std::string trainingData::getLabel(int value) {
+    std::string theLabel = "not found";
+    for (const auto& element : labels) {
+        if (element.second == value) {
+            theLabel = element.first;
+        }
+    }
+    return theLabel;
+}
+
+std::vector<std::string> trainingData::getColumnNames() {
+    return trainingSet[targetPhrase].columnNames;
+}
+
+void trainingData::setColumnNames(std::vector<std::string> column_names) {
+    trainingSet[targetPhrase].columnNames = column_names;
+}
+
+Json::Value trainingData::parse2json() {
+    Json::Value root;
+    Json::Value metadata;
+    Json::Value trainingSetJSON;
+    
+    metadata["creator"] = "RAPID-MIX API C++";
+    metadata["version"] = RAPIDMIX_VERSION;
+    
+    //Go through all the phrases
+    for (int i = 0; i < trainingSet.size(); ++i) {
+        Json::Value thisPhrase;
+        thisPhrase.append(trainingSet[i].uniqueId);
+        thisPhrase.append(trainingSet[i].label);
+      
+        Json::Value column_names;
+        for (int j = 0; j < trainingSet[i].columnNames.size(); ++j) {
+            column_names.append(trainingSet[i].columnNames[j]);
+        }
+        thisPhrase.append(column_names);
+        
+        Json::Value elements;
+        for (int j = 0; j < trainingSet[i].elements.size(); ++j) {
+            Json::Value singleElement;
+            
+            Json::Value elementInput;
+            for (int k = 0; k < trainingSet[i].elements[j].input.size(); ++k) {
+                elementInput.append(trainingSet[i].elements[j].input[k]);
+            }
+            singleElement.append(elementInput);
+            
+            Json::Value elementOutput;
+            for (int k = 0; k < trainingSet[i].elements[j].output.size(); ++k) {
+                elementOutput.append(trainingSet[i].elements[j].output[k]);
+            }
+            singleElement.append(elementOutput);
+            
+            singleElement.append(trainingSet[i].elements[j].timeStamp);
+            elements.append(singleElement);
+        }
+        thisPhrase.append(elements);
+        
+        trainingSetJSON.append(thisPhrase);
+    }
+    
+    root["metadata"] = metadata;
+    root["trainingSet"] = trainingSetJSON;
+    return root;
+}
+
+std::string trainingData::getJSON() {
+    Json::Value root = parse2json();
+    return root.toStyledString();
+}
+
+void trainingData::writeJSON(const std::string &filepath) {
+    Json::Value root = parse2json();
+    std::ofstream jsonOut;
+    jsonOut.open (filepath);
+    Json::StyledStreamWriter writer;
+    writer.write(jsonOut, root);
+    jsonOut.close();
+    
+}
+
+void trainingData::json2trainingSet(const Json::Value &root) {
+    trainingSet = {};
+    for (const Json::Value& jsonPhrase : root["trainingSet"]) {
+    
+        phrase tempPhrase = { jsonPhrase[0].asUInt(), jsonPhrase[1].asString()    };
+        
+        for (int i = 0; i < jsonPhrase[2].size(); ++i) {
+            tempPhrase.columnNames.push_back(jsonPhrase[2][i].asString());
+        }
+        
+        for (int i = 0; i < jsonPhrase[3].size(); ++i) {
+            element tempElement;
+            for (int j = 0; j < jsonPhrase[3][i][0].size(); ++j) {
+                tempElement.input.push_back(jsonPhrase[3][i][0][j].asDouble());
+            }
+            for (int j = 0; j < jsonPhrase[3][i][1].size(); ++j) {
+                tempElement.output.push_back(jsonPhrase[3][i][1][j].asDouble());
+            }
+            tempElement.timeStamp = jsonPhrase[3][i][2].asDouble();
+            
+            tempPhrase.elements.push_back(tempElement);
+        }
+        trainingSet.push_back(tempPhrase);
+    }
+}
+
+bool trainingData::putJSON(const std::string &jsonMessage) {
+    Json::Value parsedFromString;
+    Json::Reader reader;
+    bool parsingSuccessful = reader.parse(jsonMessage, parsedFromString);
+    if (parsingSuccessful)
+    {
+        json2trainingSet(parsedFromString);
+    }
+    return parsingSuccessful;
+}
+
+bool trainingData::readJSON(const std::string &filepath) {
+    Json::Value root;
+    std::ifstream file(filepath);
+    file >> root;
+    json2trainingSet(root);
+    return true; //TODO: check something first
+}
+
+
+RAPIDMIX_END_NAMESPACE
diff --git a/src/machineLearning/trainingData.h b/src/machineLearning/trainingData.h
new file mode 100644
index 0000000000000000000000000000000000000000..de13b550ab2d56e87ae09fde5f5f98a819eb6e42
--- /dev/null
+++ b/src/machineLearning/trainingData.h
@@ -0,0 +1,85 @@
+/*
+ * trainingData.h
+ * Created by Michael Zbyszynski on 2 Feb 2017
+ * Copyright © 2017 Goldsmiths. All rights reserved.
+ */
+
+#ifndef trainingData_h
+#define trainingData_h
+
+#include <vector>
+#include <string>
+#include <unordered_map>
+
+#include "rapidMix.h"
+#include "json.h"
+
+RAPIDMIX_BEGIN_NAMESPACE
+
+/** This is used by both NN and KNN models for training */
+class trainingData {
+  
+public:
+    
+    trainingData();
+    
+    struct element{
+        uint32_t uniqueId; //MZ: Does this scope of this id need to extend beyond this instantiation?
+        std::vector<double> input;
+        std::vector<double> output;
+        double timeStamp;
+    };
+    
+    struct phrase {
+        uint32_t uniqueId;
+        std::string label; //TODO: Need to work this with templates
+        std::vector<std::string> columnNames;
+        std::vector<element> elements;
+    };
+    
+    std::vector<phrase> trainingSet;
+    
+    //* Create a new phrase that can be recorded into. Returns phrase id */
+    uint32_t startRecording();
+    
+    //* Create new phrase, with a label, that can be recorded into. Returns phrase id */
+    uint32_t startRecording(std::string label);
+    
+    //* Add an element to the phrase that is recording, or to the default phrase if recording is stopped. Returns phrase id. */
+    uint32_t addElement(std::vector<double>input, std::vector<double> output);
+    
+    void stopRecording();
+    
+    std::vector<std::string> getColumnNames();
+    void setColumnNames(std::vector<std::string> columnNames);
+  
+    
+    /** Get a JSON representation of the data set in the form of a styled string */
+    std::string getJSON();
+    /** Write a JSON version of the training set to specified file path */
+    void writeJSON(const std::string &filepath);
+    /** populate a data set with string. See getJSON() */
+    bool putJSON(const std::string &jsonMessage);
+    /** read a JSON file at file path and build a training set from it */
+    bool readJSON(const std::string &filepath);
+
+    
+    //this holds string labels
+    std::unordered_map<std::string, int> labels;
+    std::string getLabel(int value);
+    
+private:
+    int targetPhrase;
+    uint32_t currentId;
+    
+    //* Returns and increments current id */
+    uint32_t assignCurrentId();
+    
+    Json::Value parse2json();
+    void json2trainingSet(const Json::Value &newTrainingData);
+    
+};
+
+RAPIDMIX_END_NAMESPACE
+
+#endif