text 快速浏览Pandas for Data Science

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了text 快速浏览Pandas for Data Science相关的知识,希望对你有一定的参考价值。

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "from https://towardsdatascience.com/quick-dive-into-pandas-for-data-science-cc1c1a80d9c4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating Series from Python List"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "countries = ['USA', 'Brazil', 'Japan', 'France']\n",
    "data = [100, 200, 300, 400]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "USA       100\n",
       "Brazil    200\n",
       "Japan     300\n",
       "France    400\n",
       "dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(data, index=countries)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating Series from a NumPy Array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([100, 200, 300, 400])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np_arr = np.array(data)\n",
    "np_arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    100\n",
       "1    200\n",
       "2    300\n",
       "3    400\n",
       "dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(np_arr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating Series from Python Dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'a': 50, 'b': 60, 'c': 30, 'd': 100}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dictionary = {'a': 50, 'b': 60, 'c': 30, 'd': 100}\n",
    "dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a     50\n",
       "b     60\n",
       "c     30\n",
       "d    100\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(dictionary)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Grabbing information from Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series1 = pd.Series([1, 2, 3, 4], ['London', 'HongKong', 'Mumbai', 'Bangladesh'])\n",
    "series1['London']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Performing Arithmetic operations on Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "series1 = pd.Series([2, 4, 6, 8], ['London', 'HongKong', 'Mumbai', 'Bangladesh'])\n",
    "series2 = pd.Series([1, 3, 5, 7], ['London', 'Sao Paulo', 'Mumbai', 'Delhi'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bangladesh    NaN\n",
       "Delhi         NaN\n",
       "HongKong      NaN\n",
       "London        1.0\n",
       "Mumbai        1.0\n",
       "Sao Paulo     NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series1 - series2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bangladesh     NaN\n",
       "Delhi          NaN\n",
       "HongKong       NaN\n",
       "London         3.0\n",
       "Mumbai        11.0\n",
       "Sao Paulo      NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series1 + series2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bangladesh     NaN\n",
       "Delhi          NaN\n",
       "HongKong       NaN\n",
       "London         2.0\n",
       "Mumbai        30.0\n",
       "Sao Paulo      NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series1 * series2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating DataFrame from a dictionary of Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Name</th>\n",
       "      <th>Nationality</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>28</td>\n",
       "      <td>Marcelo</td>\n",
       "      <td>US</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>27</td>\n",
       "      <td>Gabriel</td>\n",
       "      <td>Brazil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>38</td>\n",
       "      <td>Raul</td>\n",
       "      <td>China</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Bangladesh</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Age     Name Nationality\n",
       "a  28  Marcelo          US\n",
       "b  27  Gabriel      Brazil\n",
       "c  38     Raul       China\n",
       "d  24      NaN  Bangladesh"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = {'Name': pd.Series(['Marcelo', 'Gabriel', 'Raul'], index=['a', 'b', 'c']),\n",
    "      'Age': pd.Series(['28', '27', '38', '24'], index=['a', 'b', 'c', 'd']),\n",
    "      'Nationality': pd.Series(['US', 'Brazil', 'China', 'Bangladesh'], index=['a', 'b', 'c', 'd'])\n",
    "     }\n",
    "pd.DataFrame(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating DataFrame from a dictionary of list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>name</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>30</td>\n",
       "      <td>George</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dubai</th>\n",
       "      <td>24</td>\n",
       "      <td>Ann</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mumbai</th>\n",
       "      <td>50</td>\n",
       "      <td>Tino</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Accra</th>\n",
       "      <td>21</td>\n",
       "      <td>Charles</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>23</td>\n",
       "      <td>Phil</td>\n",
       "      <td>2014</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        age     name  year\n",
       "Lagos    30   George  2012\n",
       "Dubai    24      Ann  2012\n",
       "Mumbai   50     Tino  2015\n",
       "Accra    21  Charles  2020\n",
       "Yuma     23     Phil  2014"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {'name': ['George', 'Ann', 'Tino', 'Charles', 'Phil'],\n",
    "        'age': [30, 24, 50, 21, 23],\n",
    "        'year': [2012, 2012, 2015, 2020, 2014]\n",
    "       }\n",
    "data_frame = pd.DataFrame(data, index=['Lagos', 'Dubai', 'Mumbai', 'Accra', 'Yuma'])\n",
    "data_frame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Selecting columns from DataFrames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Lagos      George\n",
       "Dubai         Ann\n",
       "Mumbai       Tino\n",
       "Accra     Charles\n",
       "Yuma         Phil\n",
       "Name: name, dtype: object"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame['name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Lagos      George\n",
       "Dubai         Ann\n",
       "Mumbai       Tino\n",
       "Accra     Charles\n",
       "Yuma         Phil\n",
       "Name: name, dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data_frame.name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Selecting multiple columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>George</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dubai</th>\n",
       "      <td>Ann</td>\n",
       "      <td>2012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mumbai</th>\n",
       "      <td>Tino</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Accra</th>\n",
       "      <td>Charles</td>\n",
       "      <td>2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>Phil</td>\n",
       "      <td>2014</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           name  year\n",
       "Lagos    George  2012\n",
       "Dubai       Ann  2012\n",
       "Mumbai     Tino  2015\n",
       "Accra   Charles  2020\n",
       "Yuma       Phil  2014"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame[['name', 'year']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating a new columns from a Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>name</th>\n",
       "      <th>year</th>\n",
       "      <th>points</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>30</td>\n",
       "      <td>George</td>\n",
       "      <td>2012</td>\n",
       "      <td>674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dubai</th>\n",
       "      <td>24</td>\n",
       "      <td>Ann</td>\n",
       "      <td>2012</td>\n",
       "      <td>3435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mumbai</th>\n",
       "      <td>50</td>\n",
       "      <td>Tino</td>\n",
       "      <td>2015</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Accra</th>\n",
       "      <td>21</td>\n",
       "      <td>Charles</td>\n",
       "      <td>2020</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>23</td>\n",
       "      <td>Phil</td>\n",
       "      <td>2014</td>\n",
       "      <td>222</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        age     name  year  points\n",
       "Lagos    30   George  2012     674\n",
       "Dubai    24      Ann  2012    3435\n",
       "Mumbai   50     Tino  2015      34\n",
       "Accra    21  Charles  2020      10\n",
       "Yuma     23     Phil  2014     222"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame['points'] = pd.Series([674, 3435, 34, 10, 222], index=['Lagos', 'Dubai', 'Mumbai', 'Accra', 'Yuma'])\n",
    "data_frame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Removing rows/columns from a DataFrame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Delete column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>name</th>\n",
       "      <th>points</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>30</td>\n",
       "      <td>George</td>\n",
       "      <td>674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dubai</th>\n",
       "      <td>24</td>\n",
       "      <td>Ann</td>\n",
       "      <td>3435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mumbai</th>\n",
       "      <td>50</td>\n",
       "      <td>Tino</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Accra</th>\n",
       "      <td>21</td>\n",
       "      <td>Charles</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>23</td>\n",
       "      <td>Phil</td>\n",
       "      <td>222</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        age     name  points\n",
       "Lagos    30   George     674\n",
       "Dubai    24      Ann    3435\n",
       "Mumbai   50     Tino      34\n",
       "Accra    21  Charles      10\n",
       "Yuma     23     Phil     222"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.drop('year', axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Delete row"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>name</th>\n",
       "      <th>year</th>\n",
       "      <th>points</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>30</td>\n",
       "      <td>George</td>\n",
       "      <td>2012</td>\n",
       "      <td>674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dubai</th>\n",
       "      <td>24</td>\n",
       "      <td>Ann</td>\n",
       "      <td>2012</td>\n",
       "      <td>3435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Accra</th>\n",
       "      <td>21</td>\n",
       "      <td>Charles</td>\n",
       "      <td>2020</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>23</td>\n",
       "      <td>Phil</td>\n",
       "      <td>2014</td>\n",
       "      <td>222</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       age     name  year  points\n",
       "Lagos   30   George  2012     674\n",
       "Dubai   24      Ann  2012    3435\n",
       "Accra   21  Charles  2020      10\n",
       "Yuma    23     Phil  2014     222"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.drop('Mumbai', axis=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Delete definitively"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_frame.drop('Mumbai', axis=0, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>name</th>\n",
       "      <th>year</th>\n",
       "      <th>points</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>30</td>\n",
       "      <td>George</td>\n",
       "      <td>2012</td>\n",
       "      <td>674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dubai</th>\n",
       "      <td>24</td>\n",
       "      <td>Ann</td>\n",
       "      <td>2012</td>\n",
       "      <td>3435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Accra</th>\n",
       "      <td>21</td>\n",
       "      <td>Charles</td>\n",
       "      <td>2020</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>23</td>\n",
       "      <td>Phil</td>\n",
       "      <td>2014</td>\n",
       "      <td>222</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       age     name  year  points\n",
       "Lagos   30   George  2012     674\n",
       "Dubai   24      Ann  2012    3435\n",
       "Accra   21  Charles  2020      10\n",
       "Yuma    23     Phil  2014     222"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Selecting Rows in a DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "age         24\n",
       "name       Ann\n",
       "year      2012\n",
       "points    3435\n",
       "Name: Dubai, dtype: object"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.loc['Dubai']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "age         23\n",
       "name      Phil\n",
       "year      2014\n",
       "points     222\n",
       "Name: Yuma, dtype: object"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.iloc[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>name</th>\n",
       "      <th>year</th>\n",
       "      <th>points</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>30</td>\n",
       "      <td>George</td>\n",
       "      <td>2012</td>\n",
       "      <td>674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dubai</th>\n",
       "      <td>24</td>\n",
       "      <td>Ann</td>\n",
       "      <td>2012</td>\n",
       "      <td>3435</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       age    name  year  points\n",
       "Lagos   30  George  2012     674\n",
       "Dubai   24     Ann  2012    3435"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.iloc[[0, 1]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### To select the subset of a row and column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'George'"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.loc['Lagos', 'name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>points</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Lagos</th>\n",
       "      <td>George</td>\n",
       "      <td>674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Accra</th>\n",
       "      <td>Charles</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  points\n",
       "Lagos   George     674\n",
       "Accra  Charles      10"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.loc[['Lagos', 'Accra'], ['name', 'points']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Conditional selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_frame = pd.DataFrame(data=np.random.randn(5, 4),\n",
    "                          index=['A', 'B', 'C', 'D', 'E'],\n",
    "                          columns=['W', 'X', 'Y', 'Z'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>W</th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "      <th>Z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>0.345001</td>\n",
       "      <td>0.170481</td>\n",
       "      <td>1.536828</td>\n",
       "      <td>-1.985748</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>1.517225</td>\n",
       "      <td>0.899145</td>\n",
       "      <td>1.345495</td>\n",
       "      <td>0.127641</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>0.671808</td>\n",
       "      <td>1.059403</td>\n",
       "      <td>-0.516939</td>\n",
       "      <td>-0.491974</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          W         X         Y         Z\n",
       "C  0.345001  0.170481  1.536828 -1.985748\n",
       "D  1.517225  0.899145  1.345495  0.127641\n",
       "E  0.671808  1.059403 -0.516939 -0.491974"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame[data_frame['W'] > 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C    0.170481\n",
       "D    0.899145\n",
       "E    1.059403\n",
       "Name: X, dtype: float64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame[data_frame['W'] > 0]['X']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>0.170481</td>\n",
       "      <td>1.536828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>0.899145</td>\n",
       "      <td>1.345495</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>1.059403</td>\n",
       "      <td>-0.516939</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          X         Y\n",
       "C  0.170481  1.536828\n",
       "D  0.899145  1.345495\n",
       "E  1.059403 -0.516939"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame[data_frame['W'] > 0][['X', 'Y']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>W</th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "      <th>Z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>0.671808</td>\n",
       "      <td>1.059403</td>\n",
       "      <td>-0.516939</td>\n",
       "      <td>-0.491974</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          W         X         Y         Z\n",
       "E  0.671808  1.059403 -0.516939 -0.491974"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame[(data_frame['W'] > 0) & (data_frame['X'] > 1)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Reseting the Index of a DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>W</th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "      <th>Z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>-0.379437</td>\n",
       "      <td>0.236841</td>\n",
       "      <td>0.142426</td>\n",
       "      <td>-0.148394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>-0.513635</td>\n",
       "      <td>2.581328</td>\n",
       "      <td>-1.391317</td>\n",
       "      <td>0.362272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>0.345001</td>\n",
       "      <td>0.170481</td>\n",
       "      <td>1.536828</td>\n",
       "      <td>-1.985748</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>1.517225</td>\n",
       "      <td>0.899145</td>\n",
       "      <td>1.345495</td>\n",
       "      <td>0.127641</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>E</td>\n",
       "      <td>0.671808</td>\n",
       "      <td>1.059403</td>\n",
       "      <td>-0.516939</td>\n",
       "      <td>-0.491974</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  index         W         X         Y         Z\n",
       "0     A -0.379437  0.236841  0.142426 -0.148394\n",
       "1     B -0.513635  2.581328 -1.391317  0.362272\n",
       "2     C  0.345001  0.170481  1.536828 -1.985748\n",
       "3     D  1.517225  0.899145  1.345495  0.127641\n",
       "4     E  0.671808  1.059403 -0.516939 -0.491974"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Setting the Index of a DataFrame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### First, we create a new Column 'ID'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>W</th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "      <th>Z</th>\n",
       "      <th>ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>-0.379437</td>\n",
       "      <td>0.236841</td>\n",
       "      <td>0.142426</td>\n",
       "      <td>-0.148394</td>\n",
       "      <td>df1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>-0.513635</td>\n",
       "      <td>2.581328</td>\n",
       "      <td>-1.391317</td>\n",
       "      <td>0.362272</td>\n",
       "      <td>df2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>0.345001</td>\n",
       "      <td>0.170481</td>\n",
       "      <td>1.536828</td>\n",
       "      <td>-1.985748</td>\n",
       "      <td>df3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>1.517225</td>\n",
       "      <td>0.899145</td>\n",
       "      <td>1.345495</td>\n",
       "      <td>0.127641</td>\n",
       "      <td>df4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>0.671808</td>\n",
       "      <td>1.059403</td>\n",
       "      <td>-0.516939</td>\n",
       "      <td>-0.491974</td>\n",
       "      <td>df5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          W         X         Y         Z   ID\n",
       "A -0.379437  0.236841  0.142426 -0.148394  df1\n",
       "B -0.513635  2.581328 -1.391317  0.362272  df2\n",
       "C  0.345001  0.170481  1.536828 -1.985748  df3\n",
       "D  1.517225  0.899145  1.345495  0.127641  df4\n",
       "E  0.671808  1.059403 -0.516939 -0.491974  df5"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame['ID'] = ['df1', 'df2', 'df3', 'df4', 'df5']\n",
    "data_frame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Now, we will set the 'ID' Column to be out index "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>W</th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "      <th>Z</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>df1</th>\n",
       "      <td>-0.379437</td>\n",
       "      <td>0.236841</td>\n",
       "      <td>0.142426</td>\n",
       "      <td>-0.148394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>df2</th>\n",
       "      <td>-0.513635</td>\n",
       "      <td>2.581328</td>\n",
       "      <td>-1.391317</td>\n",
       "      <td>0.362272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>df3</th>\n",
       "      <td>0.345001</td>\n",
       "      <td>0.170481</td>\n",
       "      <td>1.536828</td>\n",
       "      <td>-1.985748</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>df4</th>\n",
       "      <td>1.517225</td>\n",
       "      <td>0.899145</td>\n",
       "      <td>1.345495</td>\n",
       "      <td>0.127641</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>df5</th>\n",
       "      <td>0.671808</td>\n",
       "      <td>1.059403</td>\n",
       "      <td>-0.516939</td>\n",
       "      <td>-0.491974</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            W         X         Y         Z\n",
       "ID                                         \n",
       "df1 -0.379437  0.236841  0.142426 -0.148394\n",
       "df2 -0.513635  2.581328 -1.391317  0.362272\n",
       "df3  0.345001  0.170481  1.536828 -1.985748\n",
       "df4  1.517225  0.899145  1.345495  0.127641\n",
       "df5  0.671808  1.059403 -0.516939 -0.491974"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.set_index('ID')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Multi-level index (MultiIndex) and Index Hierarchy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('0 Level', 21),\n",
       " ('0 Level', 22),\n",
       " ('0 Level', 23),\n",
       " ('A Level', 21),\n",
       " ('A Level', 22),\n",
       " ('A Level', 23)]"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outside = ['0 Level', '0 Level', '0 Level', 'A Level', 'A Level', 'A Level']\n",
    "inside = [21, 22, 23, 21, 22, 23]\n",
    "\n",
    "indexes = list(zip(outside, inside))\n",
    "indexes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Using MultiIndex.from_tuples() function, we will create a multi-leve index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex(levels=[['0 Level', 'A Level'], [21, 22, 23]],\n",
       "           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indexes = pd.MultiIndex.from_tuples(indexes)\n",
    "indexes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Finally, let's convert out multi-leve data into a DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">0 Level</th>\n",
       "      <th>21</th>\n",
       "      <td>-0.713027</td>\n",
       "      <td>1.287223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.357808</td>\n",
       "      <td>-0.777819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0.283293</td>\n",
       "      <td>1.388174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">A Level</th>\n",
       "      <th>21</th>\n",
       "      <td>-0.867373</td>\n",
       "      <td>-1.388114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>-0.421166</td>\n",
       "      <td>0.142394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0.240072</td>\n",
       "      <td>2.179087</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "0 Level 21 -0.713027  1.287223\n",
       "        22  1.357808 -0.777819\n",
       "        23  0.283293  1.388174\n",
       "A Level 21 -0.867373 -1.388114\n",
       "        22 -0.421166  0.142394\n",
       "        23  0.240072  2.179087"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame = pd.DataFrame(np.random.randn(6, 2), index=indexes, columns=['A', 'B'])\n",
    "data_frame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Let's grab everything undex '0 Level'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>-0.713027</td>\n",
       "      <td>1.287223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.357808</td>\n",
       "      <td>-0.777819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0.283293</td>\n",
       "      <td>1.388174</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A         B\n",
       "21 -0.713027  1.287223\n",
       "22  1.357808 -0.777819\n",
       "23  0.283293  1.388174"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.loc['0 Level']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### We can go futher to grab the data in '21'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A   -0.713027\n",
       "B    1.287223\n",
       "Name: 21, dtype: float64"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.loc['0 Level'].loc[21]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### We can give them names using .index.names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Levels</th>\n",
       "      <th>Num</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">0 Level</th>\n",
       "      <th>21</th>\n",
       "      <td>-0.713027</td>\n",
       "      <td>1.287223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.357808</td>\n",
       "      <td>-0.777819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0.283293</td>\n",
       "      <td>1.388174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">A Level</th>\n",
       "      <th>21</th>\n",
       "      <td>-0.867373</td>\n",
       "      <td>-1.388114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>-0.421166</td>\n",
       "      <td>0.142394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0.240072</td>\n",
       "      <td>2.179087</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    A         B\n",
       "Levels  Num                    \n",
       "0 Level 21  -0.713027  1.287223\n",
       "        22   1.357808 -0.777819\n",
       "        23   0.283293  1.388174\n",
       "A Level 21  -0.867373 -1.388114\n",
       "        22  -0.421166  0.142394\n",
       "        23   0.240072  2.179087"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_frame.index.names = ['Levels', 'Num']\n",
    "data_frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

以上是关于text 快速浏览Pandas for Data Science的主要内容,如果未能解决你的问题,请参考以下文章

用pandas_profiling快速探索数据,算不算EDA(Exploratory Data Analysis)首选工具

text 快速浏览

pandas_profiling

pandas中DataFrame逐行读取的方法

numpy 或 pandas groupby 方式替换 2 个 for 循环

python pandas 实用操作案例