In [2]:
 
import numpy as np
import pandas as pd
import matplotlib
from ggplot import diamonds
matplotlib.style.use('ggplot')  # Use ggplot style plots
In [4]:
 
diamonds.shape # Check data shape
Out[4]:
(53940, 10)
In [6]:
 
diamonds.head(5)
Out[6]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
In [9]:
x
%matplotlib inline
diamonds.hist(column="carat",        # Column to plot
              figsize=(8,8),         # Plot size
              color="blue")          # Plot color
Out[9]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000BD44CF8>]], dtype=object)
In [11]:
diamonds.hist(column="carat",                # Column to plot
                      figsize=(8,8),         # Plot size
                color="blue",          # Plot color
                      bins=50,               # Use 50 bins
                range= (0,3.5))        # Limit x-axis range
Out[11]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000BE8EAC8>]], dtype=object)
In [13]:
diamonds[diamonds["carat"] > 3.5]
Out[13]:
carat cut color clarity depth table price x y z
23644 3.65 Fair H I1 67.1 53 11668 9.53 9.48 6.38
25998 4.01 Premium I I1 61.0 61 15223 10.14 10.10 6.17
25999 4.01 Premium J I1 62.5 62 15223 10.02 9.94 6.24
26444 4.00 Very Good I I1 63.3 58 15984 10.01 9.94 6.31
26534 3.67 Premium I I1 62.4 56 16193 9.86 9.81 6.13
27130 4.13 Fair H I1 64.8 61 17329 10.00 9.85 6.43
27415 5.01 Fair J I1 65.5 59 18018 10.74 10.54 6.98
27630 4.50 Fair J I1 65.8 58 18531 10.23 10.16 6.72
27679 3.51 Premium J VS2 62.5 59 18701 9.66 9.63 6.03
In [16]:
diamonds.boxplot(column="carat",
                return_type='axes')
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0xc71e630>
In [18]:
diamonds.boxplot(column="price",    # Column to plot
              by= "clarity",        # Column to split upon
              figsize= (8,8))       # Figure size
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0xcb0b4e0>
In [20]:
 
diamonds.boxplot(column="carat",      # Column to plot
               by= "clarity",         # Column to split upon
               figsize= (8,8))        # Figure size
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0xcebc9e8>
In [22]:
diamonds["carat"].plot(kind="density",  # Create density plot
                      figsize=(8,8),    # Set figure size
                      xlim= (0,5))      # Limit x axis values
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0xcb96da0>
In [24]:
carat_table = pd.crosstab(index=diamonds["clarity"],                               columns="count")
carat_table
Out[24]:
col_0 count
clarity
I1 741
IF 1790
SI1 13065
SI2 9194
VS1 8171
VS2 12258
VVS1 3655
VVS2 5066
In [27]:
carat_table.plot(kind="bar", 
                 figsize=(8,8))
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0xf6ff908>
In [29]:
carat_table = pd.crosstab(index=diamonds["clarity"], 
                          columns=diamonds["color"])
carat_table
Out[29]:
color D E F G H I J
clarity
I1 42 102 143 150 162 92 50
IF 73 158 385 681 299 143 51
SI1 2083 2426 2131 1976 2275 1424 750
SI2 1370 1713 1609 1548 1563 912 479
VS1 705 1281 1364 2148 1169 962 542
VS2 1697 2470 2201 2347 1643 1169 731
VVS1 252 656 734 999 585 355 74
VVS2 553 991 975 1443 608 365 131
In [31]:
carat_table.plot(kind="bar", 
                 figsize=(8,8),
                 stacked=True)
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0xd168208>
In [33]:
carat_table.plot(kind="bar", 
                                    figsize=(8,8),
                                    stacked=False)
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0xd7d3550>
In [35]:
 
diamonds.plot(kind="scatter",     # Create a scatterplot
              x="carat",          # Put carat on the x axis
              y="price",          # Put price on the y axis
              figsize=(10,10),
              ylim=(0,20000))
Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e45cf8>
In [36]:
# Create some data
years = [y for y in range(1950,2016)]
readings = [(y+np.random.uniform(0,20)-1900) for y in years]
time_df = pd.DataFrame({"year":years,
                        "readings":readings})
# Plot the data
time_df.plot(x="year",
             y="readings",
             figsize=(9,9))
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x1134c1d0>