diff --git a/data_preprocessing_template.ipynb b/data_preprocessing_template.ipynb new file mode 100644 index 0000000000000..120ddbec57118 --- /dev/null +++ b/data_preprocessing_template.ipynb @@ -0,0 +1,100 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WOw8yMd1VlnD" + }, + "source": [ + "# Data Preprocessing Template" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NvUGC8QQV6bV" + }, + "source": [ + "## Importing the libraries" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wfFEXZC0WS-V" + }, + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fhYaZ-ENV_c5" + }, + "source": [ + "## Importing the dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "aqHTg9bxWT_u" + }, + "source": [ + "dataset = pd.read_csv('Data.csv')\n", + "X = dataset.iloc[:, :-1].values\n", + "y = dataset.iloc[:, -1].values" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3abSxRqvWEIB" + }, + "source": [ + "## Splitting the dataset into the Training set and Test set" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hm48sif-WWsh" + }, + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)" + ], + "execution_count": 5, + "outputs": [] + } + ] +} \ No newline at end of file