diff --git a/algorithms/k_means.py b/algorithms/k_means.py
new file mode 100644
index 0000000..d83a21f
--- /dev/null
+++ b/algorithms/k_means.py
@@ -0,0 +1,49 @@
+"""
+K-means implementation
+
+Seems okay: Have not tested this rigorously, but this separates 'Iris-setosa' 
+pretty well from 'Iris-versicolor' and 'Iris-virginica',
+but mixes the latter two. 
+
+Dataset: Iris dataset.
+"""
+
+from load_data import load_iris
+import numpy as np
+
+
+X_std, y = load_iris(std=True)
+
+
+def k_means(X, num_means=3, num_iterations=10):
+    """K means. Assumes each datapoint is a 1D array."""
+    # data dim
+    N, D = X.shape
+    
+    # initialise vars
+    assignments = np.zeros(N)
+    dists = np.zeros((N, num_means))    
+
+    # 1. Init means
+    means = np.random.random((num_means, D))
+    
+    # 2. Iterate
+    for i in range(num_iterations):
+        # 2a(i) Calculate dists
+        for k in range(num_means):
+            dists[:,k] = np.sum((X - np.tile(means[k],(N,1)))**2,axis=1)
+
+        # 2a(ii): Assign clusters
+        for n in range(N):
+            assignments[n] = np.argmin(dists[n])
+
+        # 2b. Recalculate cluster means
+        for k in range(num_means):
+            means[k] = np.mean([X[i] for i in range(N) if assignments[i] == k], axis=0)
+
+    return means, assignments
+
+
+means, assignments = k_means(X_std)
+
+print(assignments, y)
\ No newline at end of file
diff --git a/algorithms/load_data.py b/algorithms/load_data.py
new file mode 100644
index 0000000..eef5a9a
--- /dev/null
+++ b/algorithms/load_data.py
@@ -0,0 +1,19 @@
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+
+def load_iris(std=False):
+    df = pd.read_csv(
+        filepath_or_buffer='https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', 
+        header=None, 
+        sep=',')
+
+    df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
+    df.dropna(how="all", inplace=True) # drops the empty line at file-end
+
+    X = df.ix[:,0:4].values
+    y = df.ix[:,4].values
+
+    if std:
+        X = StandardScaler().fit_transform(X)
+
+    return X, y
\ No newline at end of file
diff --git a/algorithms/pca.py b/algorithms/pca.py
new file mode 100644
index 0000000..2c629c8
--- /dev/null
+++ b/algorithms/pca.py
@@ -0,0 +1,49 @@
+"""
+Principal Components Analysis (PCA) using NumPy
+
+Dataset: Iris dataset.
+Adapted from Plotly PCA tutorial
+https://plot.ly/ipython-notebooks/principal-component-analysis/
+"""
+
+from load_data import load_iris
+import numpy as np
+
+
+X_std, y = load_iris(std=True)
+
+
+def pca(X_std):
+
+    # 1. Calculate covariance matrix
+    mean_vec = np.mean(X_std, axis=0)
+    # same as np.cov(X_std.T)
+    N = X_std.shape[0]
+    cov_mat = (X_std - mean_vec).T.dot((X_std - mean_vec)) / (N-1)
+
+    # 2. Find eigenvectors and eigenvalues by SVD
+    u,s,v = np.linalg.svd(X_std.T)
+
+    eig_vals = s**2/(N-1)
+    eig_vecs = u
+
+    # Can also do by eigendecomposition -> less efficient, O(N^3) 
+    # vs O(min(M,N)MN).
+    # can also do for cor_mat1 = np.corrcoef(X_std.T)
+    # eig_vals, eig_vecs = np.linalg.eig(cov_mat)
+
+    # 3. Select PCs
+    # Make a list of (eigenvalue, eigenvector) tuples
+    eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
+
+    # Sort the (eigenvalue, eigenvector) tuples from high to low
+    eig_pairs.sort()
+    eig_pairs.reverse()
+
+    # Visually confirm that the list is correctly sorted by decreasing eigenvalues
+    print('Eigenvalues and eigenectors, in descending order of eigenvalues:')
+    for i in eig_pairs:
+        print(i)
+    return eig_pairs
+
+eig_pairs = pca(X_std)
\ No newline at end of file
diff --git a/algorithms/travelling_salesman.py b/algorithms/travelling_salesman.py
new file mode 100644
index 0000000..e7f0b47
--- /dev/null
+++ b/algorithms/travelling_salesman.py
@@ -0,0 +1,78 @@
+"""
+Travelling Salesman Problem:
+An Exact Dynamic Programming Solution in Python
+
+Jessica Yung
+Dec 2018
+"""
+import numpy as np
+
+class TravellingSalesman:
+
+    def __init__(self, graph, start=0):
+        """Initialise with graph and node you start from.
+        :param graph: takes the form of an adjacency matrix
+                      (suitable since we are given a fully connected graph).
+        :param start: an int (index in adj matrix).
+        Node you start from doesn't make a difference since this is a tour.
+        """
+        self.graph = graph
+        self.start = start
+        self.nodes = list(np.arange(len(graph)))
+        self.cost_dict = {}
+
+    def cost(self, nodes, end):
+        if (tuple(nodes), end) in self.cost_dict.keys():
+            return self.cost_dict[(tuple(nodes), end)]
+        else:
+            self.cost_dict[tuple(nodes), end] = self.calc_cost(nodes, end)
+            return self.cost_dict[tuple(nodes), end]
+
+    def calc_cost(self, nodes, end):
+        if end not in nodes:
+            return Exception("Endpoint not in nodes to visit.")
+        # print("Nodes: {}".format(nodes))
+        if len(nodes) == 1:
+            return 0
+        if len(nodes) == 2:
+            return self.graph[nodes[0], nodes[1]]
+        non_end_nodes = nodes.copy()
+        non_end_nodes.remove(end)
+        temp = [self.cost(non_end_nodes, j) + self.graph[j, end] for j in non_end_nodes if j != self.start]
+        # print("Non end nodes: {}".format(non_end_nodes))
+        # print("End: ", end)
+        # for j in non_end_nodes:
+        #     if j != self.start:
+        #         print(self.cost(non_end_nodes, j))
+        #         print(self.graph[j, end])
+        #         print("Graph: ", self.graph)
+        #         print("j={}, end={}".format(j, end))
+        # print("cost candidates:", temp)
+        return min(temp)
+
+    def dp(self):
+        """Dynamic programming solution to Travelling Salesman problem."""
+        # calculate costs
+        return min(self.cost(self.nodes, i) + self.graph[i, 0] for i in self.nodes[1:])
+        # return self.cost(self.nodes, self.start)
+
+
+# test case:
+def create_adj_matrix(distances):
+    """dists: (n-1)x(n-1) matrix with (n-1)*n/2 entries
+    dists from 0 to 1, 2,...n-1, then dists from 1 to 2,...,n-1, up to dists from n-1.
+    cells that don't represent dists in input may not exist or can exist but are ignored.
+    """
+    n = len(distances) + 1
+    mat = np.diag(np.ones(n)*np.inf)
+    for i in range(n-1):
+        for j in range(n-i-1):
+            mat[i, j+i+1] = mat[j+i+1, i] = distances[i][j]
+    return mat
+
+dists = create_adj_matrix([[4, 3],[2]])
+# print(dists)
+ts = TravellingSalesman(dists, 0)
+soln = ts.dp()
+print("Min dist:", soln)
+# print(ts.cost_dict)
diff --git a/flashcards/python-knowledge.html b/flashcards/python-knowledge.html
new file mode 100644
index 0000000..6831974
--- /dev/null
+++ b/flashcards/python-knowledge.html
@@ -0,0 +1,28 @@
+What does 'first class object' mean?;
+
+egs of first-class objects in Python; Functions, classes.
+
+Is Python interpreted or compiled?; interpreted.
+
+typing of Python; dynamically typed (vs statically typed, specify types)
+
+does Python have access specifiers?; No. (e.g. C++'s public, private. bc 'we are all adults here'.)
+
+compiled vs interpreted languages; - compiled: (compiled code) can be executed directly in the CPU's 'native' language. - interpreted: must be translated at runtime from original format to CPU machine instructions
+
+Deep vs shallow copy; -Shallow copy copies reference pointers and changes to copied will change original. Faster. - Deep copy copies values but not reference pointers, is slower.
+
+Deep copy; Stores values copied separately. - Doesn't copy reference pointers. - Changes made in copy won't affect original etc. - Slower than shallow copy.
+
+Shallow copy; Copies reference pointers just like it copies values. - Changes made will also affect the original. - faster execution that depends on data size(?)
+
+lists vs tuples; lists are mutable, tuples are not.
+
+How multithreading is done in Python; - Global Interpreter Lock (GIL): makes sure only one thread executes at a time. Thread acquires GIL, does some work, then passes GIL onto next thread. - i.e. take turns using same CPU core. - GIL passing adds overhead to execution (slower)
+
+example of ternary operator; 5 if x > 4 else 4
+
+Memory management in Python; 
+
+Sources:
+- https://www.edureka.co/blog/interview-questions/python-interview-questions/
diff --git a/flashcards/pytorch.txt b/flashcards/pytorch.txt
new file mode 100644
index 0000000..96653c4
--- /dev/null
+++ b/flashcards/pytorch.txt
@@ -0,0 +1,7 @@
+Reshape tensor `x` from 2x4 to 4x2; x=x.view(4,2)
+Reshape tensor `x` to 5 rows; x=x.view(5,-1)
+What does the -1 in x.view(-1) refer to?; infer dimension
+
+How to set up autodifferentiation; Set up tensor with requires_grad=True: `x=torch.Tensor([2,3], requires_grad=True)` then do operations on x. Can also set via `x.requires_grad = True`.
+
+
diff --git a/flashcards/tensorflow.txt b/flashcards/tensorflow.txt
new file mode 100644
index 0000000..d3b855b
--- /dev/null
+++ b/flashcards/tensorflow.txt
@@ -0,0 +1,50 @@
+What is a tf Session?; connection to C++ backend to do computation.
+
+Why might using NumPy have high overhead?; Cost of switching back to Python (after doing e.g. matrix multiplication outside Python) every operation. especially bad if running computations on GPUs or in a distributed manner, where there can be a high cost to transferring data. -> Tf defines entire graph that runs outside Python. Python code builds graph and defines which parts of the graph should be run.
+
+
+What does the first dimension of x = tf.placeholder(tf.float32, shape=[None, 784]) correspond to?; Batch size.
+
+What does it mean when a dimension of shape is None? It can be of any size.
+
+Is the shape argument to placeholder compulsory?; No, it's optional. Helps debugging though.
+
+What is the difference between a tf.placeholder and a tf.Variable?; Placeholders are given when we ask tf to run computations and cannot be modified (I think). Variables can be modified by the computation.
+
+What are the tf types of model parameters?; usually tf.Variable, e.g. tf.Variable(tf.zeros([784]))
+
+Do you have to initialise variables before using them in a session?; Yes: sess.run(tf.global_variables_initializer())
+
+
+Matrix multiplication in tf; y = tf.matmul(x, W) + b
+
+Categorical cross entropy loss; loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=yhat) . Then ou use e.g. step = tf.train.GradientDescentOptimizer(0.5).minimize(loss), where 0.5 is the learning rate.
+
+Gradient descent step in tf; step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss), then step.run(feed_dict={x: blah, y: blah})
+
+Take an average in tf; tf.reduce_mean(thing_to_sum)
+
+Can you replace a variable in your computation graph with other input using feed_dict?; Yes, you can replace any tensor in your graph using feed_dict.
+
+cast booleans to floats; tf.cast(list_of_booleans, tf.float32)
+
+why should you initialise weights with a small amount of noise?; (1) symmetry breaking (todo: expand) and (2) to prevent 0 gradients.
+
+when using ReLU neurons, how should you initialise them?; with a slight positive bias (e.g. 0.1) to avoid 'dead neurons'.
+
+how might you initialise weights with a small amount of noise?; tf.Variable(tf.truncated_normal(shape, stddev=0.1))
+
+2D convolution layer in tf; tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME'). 
+
+I want a conv layer to compute 32 features for each 5x5 patch, input having 1 channel (e.g. greyscale). What should the shape of the weights be?; [5, 5, 1, 32]. i.e. (patchdim1, patchdim2, num_input_channels, num_output_channels)
+
+specify shape in round or square brackets in tf?; square brackets: e.g. [5, 5, 1, 32].
+
+how can you turn dropout on during training and turn it off during testing?; create placeholder keep_prob=tf.placeholder(tf.float32), dropout = tf.nn.dropout(prev_layer, keep_prob) and feed dict corresponding values when training and testing (keep_prob=1).
+
+scaling used in tf.nn.dropout; output scaled up by 1/keep_prob, so expected sum is unchanged.
+
+
+
+
+