3 min read

Business Organization Team Members Profile Counts

# connect to BigQuery
con <- dbConnect(
  bigrquery::bigquery(),
  project = "buffer-data"
)

# define sql query
sql <- "
  -- premium and up customers 
  -- include the org id that they own
  -- approximately 12k customers
  with paying_users as (
    select distinct
      u.id as user_id
      , o.id as org_id
      , u.billing_plan
      , cast(u.billing_plan_number as float64) as plan_number
      , u.is_on_new_publish
    from dbt_buffer.publish_users u
    inner join dbt_buffer.publish_organizations o
      on u.id = o.owner_id
    where cast(u.billing_plan_number as float64) > 6
  ),
  
  -- approximately 10M permissions 
  -- include the user id of the owner
  -- include number of profiles in the org
  orgs as (
    select distinct
      pp.*
      , o.owner_id
      , count(distinct p.id) as profiles
    from dbt_buffer.publish_permissions pp
    inner join dbt_buffer.publish_organizations o
      on pp.organization_id = o.id
    left join dbt_buffer.publish_profiles p
      on p.organization_id = o.id
      and (not p.is_deleted or p.is_deleted is null)
      and (not p.is_disabled or p.is_disabled is null)
    group by 1,2,3,4,5,6,7,8,9,10,11
  ),
  
  team_members as (
  
    -- all permissions for orgs owned by business customers
    -- number of profiles belonging to org of team member
     select distinct
       pu.user_id
       , pu.is_on_new_publish
       , pu.org_id as owner_org_id
       , pu.billing_plan
       , pu.plan_number
       , o.user_id as team_member_user_id
       , o.owner_id 
       , o.organization_id as team_member_ord_id
       , o.role
       , o.role_type
       , o.profiles
     from paying_users pu
     left join orgs o
       on pu.org_id = o.organization_id
  )
  
  select
    t.team_member_user_id
    , o.id as org_id
    , o.name as org_name
    , count(distinct p.id) as profiles
  from team_members t
  left join dbt_buffer.publish_organizations o
    on t.team_member_user_id = o.owner_id
  left join dbt_buffer.publish_profiles p
      on p.organization_id = o.id
      and (not p.is_deleted or p.is_deleted is null)
      and (not p.is_disabled or p.is_disabled is null)
  where t.team_member_user_id != t.user_id
  group by 1,2,3
"
  
# query bigquery
orgs <- dbGetQuery(con, sql)

Now calculate the proportion of these team members that have 0 profiles.

# get proportion that have 0 profiles
orgs %>% 
  mutate(has_no_profiles = profiles == 0) %>% 
  group_by(has_no_profiles) %>% 
  summarise(users = n_distinct(team_member_user_id)) %>% 
  mutate(prop = users / sum(users))
## # A tibble: 2 x 3
##   has_no_profiles users  prop
##   <lgl>           <int> <dbl>
## 1 FALSE            4071 0.121
## 2 TRUE            29610 0.879
# see how many own n profiles
orgs %>% 
  group_by(profiles) %>% 
  summarise(users = n_distinct(team_member_user_id)) %>% 
  mutate(prop = percent(users / sum(users)))
## # A tibble: 43 x 3
##    profiles users prop 
##       <int> <int> <chr>
##  1        0 29610 88%  
##  2        1  2230 7%   
##  3        2   830 2%   
##  4        3   621 2%   
##  5        4   117 0%   
##  6        5    50 0%   
##  7        6    44 0%   
##  8        7    36 0%   
##  9        8    29 0%   
## 10        9    18 0%   
## # … with 33 more rows