In [14]:
# Import all libraries needed for the tutorial
# General syntax to import specific functions in a library:
##from (library) import (specific library function)
from pandas import DataFrame, read_csv
# General syntax to import a library but no functions:
##import (library) as (give the library a nickname/alias)
import matplotlib.pyplot as plt
import pandas as pd #this is how I usually import pandas
import sys #only needed to determine Python version number
# Enable inline plotting
%matplotlib inline
%pylab inline
# Must get this or you will get # NameError: name 'figsize' is not defined
import matplotlib.pylab
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
figsize(15, 5)
print 'Python version ' + sys.version
print 'Pandas version ' + pd.__version__
sql
In [15]:
import pandas.io.sql
import psycopg2
conn = psycopg2.connect(user='lab')
cur = conn.cursor()
print 0 ...
In [16]:
# conn.close()
Populating the interactive namespace from numpy and matplotlib
Python version 2.7.6 (default, Mar 22 2014, 22:59:56)
[GCC 4.8.2]
Pandas version 0.16.0
In [25]:
df[['when_ts','avg_bike_num']].plot()
In [26]:
from matplotlib.font_manager import FontProperties, findfont
fp = FontProperties(family='monospace',
style='normal',
variant='normal',
weight='normal',
stretch='normal',
size='medium')
font = findfont(fp)
In [27]:
df[['when_ts','avg_bike_num']].plot()
Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3755f87110>
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f37560d2050>
In [28]:
df[['when_ts','avg_bike_num']].plot(kind='kde')
In [16]:
# query db
sql = """
select * from ubike where name = ' 'and (when_ts BETWEEN '2014-12-08' AND
'2014-12-09')
order by when_ts;
"""
ponit2_df = pandas.io.sql.read_sql(sql, conn)
len(ponit2_df)
In [17]:
ponit2_df[:1]
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3756096910>
Out[16]:
10
Out[17]:
when_ts where_pt code name area_name space_num avg_bike_num max_bike_
0
2014-
12-08
15:00:00
(25.048268,121.552278) 18 38 13.5 15
In [121]:
from pandas.tools.plotting import table
fig, ax = plt.subplots(1, 1)
table(ax, np.round(df[['space_num','max_space_num','min_space_num']].describe(
), 2),
loc='upper left', colWidths=[0.1, 0.1, 0.1])
df[['space_num','max_space_num','min_space_num']].plot(table=True, ax=ax)
sql
In [124]:
# query db
sql = """
select a.when_ts as time ,
a.avg_bike_num as point_A,
b.avg_bike_num as point_B
from ubike a
inner join ubike b on
a.when_ts = b.when_ts
and (a.when_ts BETWEEN '2014-12-08' AND '2014-12-09')
and (a.name = ' ' and b.name = ' ');
"""
PointJoin = pandas.io.sql.read_sql(sql, conn)
len(PointJoin)
Out[121]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fba80348ad0>
Out[124]:
10
In [32]:
# query db
sql = """
select name from ubike where area_name like '% %' group by name
"""
pointA = pandas.io.sql.read_sql(sql, conn)
len(pointA)
In [33]:
pointA
Out[32]:
14
Out[33]:
name
0
1
2 (2 )
3
4
5 (2 )
6
7
8
9
10
11 (2 )
12
13
In [90]:
# query db
sql = """
select * from ubike where name = ' ' and (when_ts BETWEEN '2014-12
-25' AND '2014-12-31')
"""
pointA = pandas.io.sql.read_sql(sql, conn)
len(pointA)
In [35]:
pointA[:1]
In [91]:
# query db
sql = """
select * from tpweather where name = ' ' and (when_ts BETWEEN '2014-1
2-25' AND '2014-12-31');
"""
weaterA = pandas.io.sql.read_sql(sql, conn)
len(weaterA)
In [37]:
weaterA[:1]
Out[90]:
145
Out[35]:
when_ts where_pt code name area_name space_num avg_bike_num max_bike_
0
2014-
12-08
15:00:00
(25.116325,121.534136) 123 44 31 33
Out[91]:
145
Out[37]:
when_ts where_pt name temp max_temp min_temp hum_pct pressure win
0
2014-
12-19
(25.1180133,121.5373439) 17.3889 17.4 16.9 76 1022.38 2.7
sub select
In [92]:
# query db
sql = """
select * from tpweather where when_ts in
(select when_ts
from ubike
where name = ' ' order by when_ts )
and name = ' ' and (when_ts BETWEEN '2014-12-25' AND '2014-12-31' ) ord
er by when_ts
"""
weatherA = pandas.io.sql.read_sql(sql, conn)
len(weatherA)
In [93]:
len(pointA)
In [94]:
len(weatherA)
Out[92]:
145
Out[93]:
145
Out[94]:
145