# connect to BigQuery
con <- dbConnect(
bigrquery::bigquery(),
project = "buffer-data"
)
# define sql query
sql <- "
-- premium and up customers
-- include the org id that they own
-- approximately 12k customers
with paying_users as (
select distinct
u.id as user_id
, o.id as org_id
, u.billing_plan
, cast(u.billing_plan_number as float64) as plan_number
, u.is_on_new_publish
from dbt_buffer.publish_users u
inner join dbt_buffer.publish_organizations o
on u.id = o.owner_id
where cast(u.billing_plan_number as float64) > 6
),
-- approximately 10M permissions
-- include the user id of the owner
-- include number of profiles in the org
orgs as (
select distinct
pp.*
, o.owner_id
, count(distinct p.id) as profiles
from dbt_buffer.publish_permissions pp
inner join dbt_buffer.publish_organizations o
on pp.organization_id = o.id
left join dbt_buffer.publish_profiles p
on p.organization_id = o.id
and (not p.is_deleted or p.is_deleted is null)
and (not p.is_disabled or p.is_disabled is null)
group by 1,2,3,4,5,6,7,8,9,10,11
),
team_members as (
-- all permissions for orgs owned by business customers
-- number of profiles belonging to org of team member
select distinct
pu.user_id
, pu.is_on_new_publish
, pu.org_id as owner_org_id
, pu.billing_plan
, pu.plan_number
, o.user_id as team_member_user_id
, o.owner_id
, o.organization_id as team_member_ord_id
, o.role
, o.role_type
, o.profiles
from paying_users pu
left join orgs o
on pu.org_id = o.organization_id
)
select
t.team_member_user_id
, o.id as org_id
, o.name as org_name
, count(distinct p.id) as profiles
from team_members t
left join dbt_buffer.publish_organizations o
on t.team_member_user_id = o.owner_id
left join dbt_buffer.publish_profiles p
on p.organization_id = o.id
and (not p.is_deleted or p.is_deleted is null)
and (not p.is_disabled or p.is_disabled is null)
where t.team_member_user_id != t.user_id
group by 1,2,3
"
# query bigquery
orgs <- dbGetQuery(con, sql)
Now calculate the proportion of these team members that have 0 profiles.
# get proportion that have 0 profiles
orgs %>%
mutate(has_no_profiles = profiles == 0) %>%
group_by(has_no_profiles) %>%
summarise(users = n_distinct(team_member_user_id)) %>%
mutate(prop = users / sum(users))
## # A tibble: 2 x 3
## has_no_profiles users prop
## <lgl> <int> <dbl>
## 1 FALSE 4071 0.121
## 2 TRUE 29610 0.879
# see how many own n profiles
orgs %>%
group_by(profiles) %>%
summarise(users = n_distinct(team_member_user_id)) %>%
mutate(prop = percent(users / sum(users)))
## # A tibble: 43 x 3
## profiles users prop
## <int> <int> <chr>
## 1 0 29610 88%
## 2 1 2230 7%
## 3 2 830 2%
## 4 3 621 2%
## 5 4 117 0%
## 6 5 50 0%
## 7 6 44 0%
## 8 7 36 0%
## 9 8 29 0%
## 10 9 18 0%
## # … with 33 more rows