To Learn SQL or Not is the Question

https://odsc.com/apac/
create or replace view odsc_workshop.full_dataset as
(
select
c.customer_id
,c.customer_city
,c.customer_state
,oi.order_id
,oi.order_item_id
,oi.price
,oi.freight_value
,o.order_status
,o.order_delivered_customer_date
,p.product_id
,p.product_category_name
,s.seller_id
,s.seller_city
,s.seller_state
,op.payment_sequential
,op.payment_type
,op.payment_installments
,op.payment_value
,o.order_purchase_timestamp
from
odsc_workshop.customers c
left join
odsc_workshop.orders o
on c.customer_id = o.customer_id
left join
odsc_workshop.order_payments op
on o.order_id = op.order_id
left join
odsc_workshop.order_items oi
on o.order_id = oi.order_id
left join
odsc_workshop.products p
on oi.product_id = p.product_id
left join
odsc_workshop.sellers s
on oi.seller_id = s.seller_id
)
select 
customer_id
,total_items_bought
,monetary_value
,date_part('day',last_purchase_date - first_purchase_date)
as customer_tenure_in_days
,date_part('day',current_date - last_purchase_date) as days_since_last_purchase
from
(
select
customer_id
, min(order_purchase_timestamp) as first_purchase_date
, max(order_purchase_timestamp) as last_purchase_date
, count(distinct order_item_id) as total_items_bought
, sum(price) as monetary_value
from odsc_workshop.full_dataset
group by 1
) lrfm
create or replace view odsc_workshop.lrfm_data as
(
select
customer_id
,total_items_bought
,monetary_value
, date_part('day',last_purchase_date - first_purchase_date)
as customer_tenure_in_days
,date_part('day',current_date - last_purchase_date) as
days_since_last_purchase
from
(
select
customer_id
, min(order_purchase_timestamp) as first_purchase_date
, max(order_purchase_timestamp) as last_purchase_date
, count(distinct order_item_id) as total_items_bought
, sum(price) as monetary_value
from odsc_workshop.full_dataset
group by 1
) lrfm
)
###Import Libraries
import psycopg2
import pandas as pd
###Connect to the database
conn = psycopg2.connect(user="postgres",
password="XXXXXXXXX",
host="127.0.0.1",
port="5432",
database="odsc_sql")
###Open the cursor
cur = conn.cursor()
###Select your Dataset
query = "select * from odsc_workshop.lrfm_data"
###Execute your query
cur.execute(query)
###Put it in a dataframe
df_purchases = pd.read_sql_query(query,conn)
###Check your data
df_purchases.head()
###Close the cursor and the connection
cur.close()
conn.close()

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
ODSC - Open Data Science

ODSC - Open Data Science

94K Followers

Our passion is bringing thousands of the best and brightest data scientists together under one roof for an incredible learning and networking experience.