import sqlite3
import datetime
import pandas as pd
import numpy as np


database_path = 'databases/forsale2_hashed.db'
con = sqlite3.connect(database_path)
cur = con.cursor()


res = cur.execute("SELECT dt_of_update FROM database_update ORDER BY dt_of_update DESC LIMIT 2")
db_updates = res.fetchall()
dt_last_db_update = db_updates[0][0]
dt_prior_db_update = db_updates[1][0]


res = cur.execute("SELECT * FROM addresses NATURAL JOIN dates WHERE dates.dt_last_seen > ?", (dt_prior_db_update,))
actives = res.fetchall()
num_current = len(actives)


res = cur.execute("SELECT COUNT(*) from addresses")
result = res.fetchall()
num_total_properties = result[0][0]
con.close()


def pretty_table(list_of_lists, caption='', columns=[], pctcol=''):
    df = pd.DataFrame(list_of_lists)
    
    if columns != []:
        df.columns = columns
        
    if pctcol != '':
        total = df[pctcol].sum()
        df['Pct'] = np.round(100 * df[pctcol] / total,2)
            
    dfStyler = df.style
    dfStyler.set_table_styles([{'selector':'caption', 'props':[("text-align", "center"),("font-size", "150%"),("color", 'black')]}, 
                               {'selector':'th', 'props':[('text-align', 'left')]},
                               {'selector': 'td', 'props': 'text-align: left; font-weight: bold;'},
                              ], overwrite = False)
    
    if pctcol != '':
        dfStyler.format({'Pct': "{:.2f}"})
    
    dfStyler.set_caption(caption)
    
    dfStyler.hide(axis='index')
    if columns == []:
        dfStyler.hide(axis='columns')

    display(dfStyler)


pretty_table([['Date / Time of this analysis', str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))],
                ['Date / Time of last DB update', dt_last_db_update],
                ['Number of properties in DB', num_total_properties],
                ['Number of current lisitings', num_current]
                ], caption = 'Database Summary')


# Current listings are anything last seen after the prior database update
con = sqlite3.connect(database_path)
query = f"SELECT * FROM addresses NATURAL JOIN dates WHERE dates.dt_last_seen > \'{dt_prior_db_update}\'"
actives_df = pd.read_sql(query, con)
con.close()


actives_df.set_index('pid', inplace=True)
actives_df.head(5).style.hide(axis='index')


actives_df['dwelling_subtype'].unique()

array(['Apartment', 'Town House', 'Mews', 'Flat', 'Duplex', 'Maisonette',
       'Ground Flat', 'Parking', 'Studio', 'Terraced', 'Semi-Detached',
       'Not Specified', 'Plot', 'Retirement Property', 'Garages',
       'Detached', 'Penthouse', 'Detached Bungalow', 'End of Terrace',
       'House', 'Villa', 'Terraced Bungalow', 'Semi-Detached Bungalow',
       'Ground Maisonette', 'Bungalow', 'Land', 'Link Detached House',
       'Hotel Room', 'Cottage', 'Triplex', 'Detached Villa',
       'Sheltered Housing', 'Park Home', 'Farm Land',
       'Semi-detached Villa'], dtype=object)


flats_str = 'Flat|Apartment|Duplex|Ground Flat|Maisonette|Studio|Penthouse|Ground Maisonette|Triplex'

houses_str = 'Town House|Mews|Terraced|Semi-Detached|End of Terrace|Detached|House|Semi-Detached Bungalow|Detached Villa| \
              Detached Bungalow|Villa|Bungalow|Terraced Bungalow|Cottage|Link Detached House|Semi-detached Villa'

# If it's not a Flat, test if it's a House and if not, it's an Other
actives_df['dwelling_cat'] = np.where(actives_df['dwelling_subtype'].str.contains(flats_str), 'Flat', \
            np.where(actives_df['dwelling_subtype'].str.contains(houses_str), 'House', 'Other'))


actives_df['dom'] = (pd.to_datetime(actives_df['dt_last_seen']) - \
                      pd.to_datetime(actives_df['dt_first_listed']) + \
                      datetime.timedelta(hours=12)).dt.days


# Convert beds to integer
actives_df['beds'] = actives_df['beds'].astype('int')


con = sqlite3.connect(database_path)
query = f"SELECT * FROM prices NATURAL JOIN dates WHERE dates.dt_last_seen > \'{dt_prior_db_update}\'"
listings_prices_df = pd.read_sql(query, con)
listings_prices_df.set_index('pid', inplace = True)
con.close()


actives_df['current_price_type'] = listings_prices_df.groupby('pid').tail(1)['price_type']
actives_df['current_price'] = listings_prices_df.groupby('pid').tail(1)['price']

# There's a small number of entries that have something other than a price - e.g. 'Coming Soon' or 'POA'
# They aren't of use to us so we'll remove them
print('Number of non-priced properties:', len(actives_df[~actives_df['current_price'].str.isdigit()]))
actives_df = actives_df[actives_df['current_price'].str.isdigit()]
actives_df['current_price'] = actives_df['current_price'].astype('int')

Number of non-priced properties: 14


con = sqlite3.connect(database_path)
query = f"SELECT * FROM status NATURAL JOIN dates WHERE dates.dt_last_seen > \'{dt_prior_db_update}\'"
listings_status_df = pd.read_sql(query, con)
listings_status_df.set_index('pid', inplace = True)
con.close()


actives_df['current_status'] = listings_status_df.groupby('pid').tail(1)['status']


actives_df.head(5).style.hide(axis='index')


df = pd.DataFrame(actives_df['dwelling_cat'].value_counts())
pretty_table([x for x in zip(df.index.values, df['count'].values)],
             caption = 'All Properties', columns=['Type','Num'], pctcol='Num')


df = pd.DataFrame(actives_df[actives_df['dwelling_cat'] == 'Flat']['dwelling_subtype'].value_counts())
pretty_table([x for x in zip(df.index.values, df['count'].values)], \
             caption = 'Flats', columns=['Type','Num'], pctcol='Num')


df = pd.DataFrame(actives_df[actives_df['dwelling_cat'] == 'House']['dwelling_subtype'].value_counts())
pretty_table([x for x in zip(df.index.values, df['count'].values)], \
             caption = 'Houses', columns=['Type','Num'], pctcol='Num')


df = pd.DataFrame(actives_df[actives_df['dwelling_cat'] == 'Other']['dwelling_subtype'].value_counts())
pretty_table([x for x in zip(df.index.values, df['count'].values)], \
             caption = 'Other', columns=['Type','Num'], pctcol='Num')


actives_df = actives_df[actives_df['dwelling_cat'] != 'Other']


pretty_table([
             ['Number of properties with Fixed Prices:', len(actives_df[actives_df['current_price_type'].str.contains('Fixed')])],
             ['Number of properties Offers Over / Around / From / Guide:', len(actives_df[actives_df['current_price_type'].str.contains('Offers|Guide|From')])],
             ['Number of properties with no price type (usually New Builds):',len(actives_df[~actives_df['current_price_type'].str.contains('Fixed|Offers|Guide|From')])],
             ], caption = 'Price Type Summary', columns=['','Num'], pctcol='Num')


pretty_table([
             ['Number Active / For Sale:', len(actives_df[actives_df['current_status'] == 'Active'])],
             ['Number Under Offer:', len(actives_df[actives_df['current_status'] == 'Under offer'])],
             ['Number Sold STC:', len(actives_df[actives_df['current_status'].str.contains('Sold')])],
             ['Number Reserved:', len(actives_df[actives_df['current_status'].str.contains('Reserved')])],
             ], caption = 'Sales Status Summary', columns=['','Num'], pctcol='Num')


actives_df['current_price'] = actives_df['current_price'].astype('int')


pretty_table([
            ['Overall Min price:', '£'+str(int(actives_df['current_price'].min()))],
            ['Overall Max price:', '£'+str(int(actives_df['current_price'].max()))],
            ['Overall Median price:', '£'+str(int(actives_df['current_price'].median()))],
            ['Overall Mean price:', '£'+str(int(actives_df['current_price'].mean()))]
            ],
            caption = 'Overall Price Summary'
            )


import matplotlib.pyplot as plt

all_prices = actives_df['current_price'] / 1000
house_prices = actives_df[actives_df['dwelling_cat'] == 'House']['current_price'] / 1000
flat_prices = actives_df[actives_df['dwelling_cat'] == 'Flat']['current_price'] / 1000

p = [0, 0.25, 0.5, 0.75, 0.95]
plt.figure(figsize=(10,4))
plt.violinplot([all_prices, house_prices, flat_prices], vert=False, showmedians=False, widths=0.9, quantiles=[p, p, p])
ax = plt.gca()

ax.xaxis.grid(True)
ax.set_yticks([y + 1 for y in range(3)],
                  labels=['All', 'Houses', 'Flats'])
ax.set_xlabel('Price £k')
plt.title('All vs House vs Flat Prices (Percentiles: 0%, 25%, 50%, 75%, 95%)')
ax.set_xlim(0, 1100)
plt.show()


from bokeh.models import ColumnDataSource, LabelSet
from bokeh.plotting import figure
from bokeh.transform import dodge
import bokeh.palettes as bp
from bokeh.io import show
from bokeh.io import output_notebook
output_notebook(hide_banner=True)


beds = [str(x) for x in range(1,6)]

mean_prices_fixed = [actives_df[(actives_df['beds'] == x) & (actives_df['current_price_type'].str.contains('Fixed'))]\
                     ['current_price'].mean()/1000 for x in range(1,6)]

mean_prices_offers = [actives_df[(actives_df['beds'] == x) & (actives_df['current_price_type'].str.contains('Offers'))]\
                      ['current_price'].mean()/1000 for x in range(1,6)]

mean_prices_newbuild = [actives_df[(actives_df['beds'] == x) & (actives_df['new_build'] == '1')]\
                        ['current_price'].mean()/1000 for x in range(1,6)]

data = {'Beds' : beds,
        'Offers' : mean_prices_offers,
        'Fixed' : mean_prices_fixed,
        'New Build' : mean_prices_newbuild,
       }

source = ColumnDataSource(data=data)

p = figure(x_range=beds, title='Mean Price vs Number of Beds over Price Type',
           width=800, height=400, toolbar_location=None, tools="")

# Offers over bars
p.vbar(x=dodge('Beds', -0.25, range=p.x_range), top='Offers', source=source,
       width=0.2, color=bp.Light6[0], legend_label="Offers Over")

# Fixed price bars
p.vbar(x=dodge('Beds',  0.0,  range=p.x_range), top='Fixed', source=source,
       width=0.2, color=bp.Light6[1], legend_label="Fixed Price")

# New build bars
p.vbar(x=dodge('Beds',  0.25, range=p.x_range), top='New Build', source=source,
       width=0.2, color=bp.Light6[2], legend_label="New Build")

p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
p.yaxis.axis_label = 'Mean Price £k'
p.xaxis.axis_label = 'Num Beds'
p.y_range.start = 0

show(p)


actives_df.to_pickle('actives_df_part2.pkl')


# jupyter nbconvert --to html --no-prompt adsg_house_hunting2_part2.ipynb

area	address	dwelling_type	dwelling_subtype	new_build	beds	baths	latitude	longitude	dt_first_listed	dt_first_seen	dt_last_seen
eh1	Demo version: address hidden for privacy	3 bedroom apartment for sale	Apartment	1	3	2	55.95484	-3.18843	2023-08-17 11:27:13	2023-08-24 18:45:17	2023-11-03 18:18:38
eh1	Demo version: address hidden for privacy	7 bedroom town house for sale	Town House	0	7	None	55.958904	-3.187063	2023-04-28 15:09:46	2023-08-24 18:45:17	2023-11-03 18:18:38
eh1	Demo version: address hidden for privacy	5 bedroom mews house for sale	Mews	0	5	4	55.956736	-3.191266	2023-08-24 10:14:06	2023-08-24 18:45:17	2023-11-03 18:18:38
eh1	Demo version: address hidden for privacy	2 bedroom apartment for sale	Apartment	1	2	2	55.95484	-3.18843	2023-08-10 00:36:25	2023-08-24 18:45:17	2023-11-03 18:18:38
eh1	Demo version: address hidden for privacy	2 bedroom apartment for sale	Apartment	1	2	2	55.95484	-3.18843	2023-08-10 00:36:23	2023-08-24 18:45:17	2023-11-03 18:18:38

area	address	dwelling_type	dwelling_subtype	new_build	beds	baths	latitude	longitude	dt_first_listed	dt_first_seen	dt_last_seen	dwelling_cat	dom	current_price_type	current_price	current_status
eh1	Demo version: address hidden for privacy	3 bedroom apartment for sale	Apartment	1	3	2	55.95484	-3.18843	2023-08-17 11:27:13	2023-08-24 18:45:17	2023-11-03 18:18:38	Flat	78	Fixed Price	1495000	Active
eh1	Demo version: address hidden for privacy	7 bedroom town house for sale	Town House	0	7	None	55.958904	-3.187063	2023-04-28 15:09:46	2023-08-24 18:45:17	2023-11-03 18:18:38	House	189	Offers Over	1250000	Under offer
eh1	Demo version: address hidden for privacy	5 bedroom mews house for sale	Mews	0	5	4	55.956736	-3.191266	2023-08-24 10:14:06	2023-08-24 18:45:17	2023-11-03 18:18:38	House	71	Offers Over	1125000	Active
eh1	Demo version: address hidden for privacy	2 bedroom apartment for sale	Apartment	1	2	2	55.95484	-3.18843	2023-08-10 00:36:25	2023-08-24 18:45:17	2023-11-03 18:18:38	Flat	86	Fixed Price	1075000	Under offer
eh1	Demo version: address hidden for privacy	2 bedroom apartment for sale	Apartment	1	2	2	55.95484	-3.18843	2023-08-10 00:36:23	2023-08-24 18:45:17	2023-11-03 18:18:38	Flat	86	Fixed Price	1005000	Active

Type	Num	Pct
Flat	1690	63.92
Apartment	446	16.87
Ground Flat	368	13.92
Maisonette	69	2.61
Duplex	32	1.21
Penthouse	26	0.98
Studio	8	0.30
Ground Maisonette	4	0.15
Triplex	1	0.04

Type	Num	Pct
Detached	650	32.31
Semi-Detached	405	20.13
Terraced	354	17.59
End of Terrace	189	9.39
Villa	71	3.53
Detached Bungalow	56	2.78
Town House	53	2.63
Detached Villa	51	2.53
House	43	2.14
Semi-Detached Bungalow	40	1.99
Bungalow	34	1.69
Cottage	26	1.29
Mews	16	0.80
Terraced Bungalow	11	0.55
Link Detached House	10	0.50
Semi-detached Villa	3	0.15

	Num	Pct
Number of properties with Fixed Prices:	1134	24.36
Number of properties Offers Over / Around / From / Guide:	3254	69.89
Number of properties with no price type (usually New Builds):	268	5.76

Date / Time of this analysis	2023-11-13 11:34:13
Date / Time of last DB update	2023-11-03 22:09:30
Number of properties in DB	7670
Number of current lisitings	4780

Type	Num	Pct
Retirement Property	59	53.64
Garages	13	11.82
Not Specified	12	10.91
Land	10	9.09
Parking	4	3.64
Plot	4	3.64
Park Home	4	3.64
Sheltered Housing	2	1.82
Hotel Room	1	0.91
Farm Land	1	0.91

	Num	Pct
Number Active / For Sale:	2333	50.11
Number Under Offer:	1256	26.98
Number Sold STC:	1055	22.66
Number Reserved:	12	0.26

Overall Min price:	£38000
Overall Max price:	£6000000
Overall Median price:	£290000
Overall Mean price:	£364304

All Properties
Type	Num	Pct
Flat	2644	55.48
House	2012	42.22
Other	110	2.31