Tidying the CDP Data - Part 2

This post documents how to tidy CDP data that includes all companies that CDP asked to respond to its questionnaire.

Load the packages

library(pacman)
p_load(dbplyr, janitor, readxl, tidyverse)
  • path_resp is path to spreadsheet
  • sheet_resp each project year 2011 to 2018 is on a sheet named by the project year
path_resp  <- "~/Dropbox/data/cdp/investor/cdp_response_status_2010_2018.xlsx"
path_resp
## [1] "~/Dropbox/data/cdp/investor/cdp_response_status_2010_2018.xlsx"
sheet_resp  <- 2011:2018  %>%  as.character
sheet_resp
## [1] "2011" "2012" "2013" "2014" "2015" "2016" "2017" "2018"
  • create clean names for each variable
find_col <- function(path_in, sheet_in) {
  read_excel(path = path_in, sheet = sheet_in) %>% clean_names()
}

clean_col <- map2(path_resp, sheet_resp, find_col)
clean_col
## [[1]]
## # A tibble: 6,557 x 15
##    account_number_… name  company_name_co… industry_activi… isin_company
##    <chr>            <chr> <chr>            <chr>            <chr>       
##  1 44               3i G… 3i Group         Banks, Diverse … GB00B1YW4409
##  2 46               3i I… 3i Infrastructu… Banks, Diverse … JE00B1RJLF86
##  3 285              3M C… 3M Company       Consumer Durabl… US88579Y1010
##  4 30               888 … 888 Holdings     Hotels, Restaur… GI000A0F6407
##  5 22750            A & … A & J Mucklow G… Real Estate      GB0006091408
##  6 1535             A.G.… A.G. Barr Plc    Food & Beverage… GB00B6XZKY75
##  7 276              A.P.… A.P. Moller - M… Water Transport… DK0010244508
##  8 87               A2A … A2A              Electric Utilit… IT0001233417
##  9 28813            A4e … A4e              Education Servi… <NA>        
## 10 94               Aalb… Aalberts Indust… Electrical Equi… NL0000852564
## # … with 6,547 more rows, and 10 more variables: country_company <chr>,
## #   project_type <chr>, project_year_project <chr>, response_status <chr>,
## #   primary_expansion <lgl>, secondary_expansions <chr>,
## #   parent_account_company <chr>, see_another_account_company <lgl>,
## #   response_status_see_another <chr>, permission <chr>
## 
## [[2]]
## # A tibble: 7,071 x 15
##    account_number_… name  company_name_co… industry_activi… isin_company
##    <chr>            <chr> <chr>            <chr>            <chr>       
##  1 44               3i G… 3i Group         Banks, Diverse … GB00B1YW4409
##  2 46               3i I… 3i Infrastructu… Banks, Diverse … JE00B1RJLF86
##  3 37811            3L E… 3L ELECTRONIC I… Technology Hard… <NA>        
##  4 285              3M C… 3M Company       Consumer Durabl… US88579Y1010
##  5 36995            4imp… 4imprint Group … Media            GB0006640972
##  6 30               888 … 888 Holdings     Hotels, Restaur… GI000A0F6407
##  7 22750            A & … A & J Mucklow G… Real Estate      GB0006091408
##  8 1535             A.G.… A.G. Barr Plc    Food & Beverage… GB00B6XZKY75
##  9 276              A.P.… A.P. Moller - M… Water Transport… DK0010244508
## 10 87               A2A … A2A              Electric Utilit… IT0001233417
## # … with 7,061 more rows, and 10 more variables: country_company <chr>,
## #   project_type <chr>, project_year_project <chr>, response_status <chr>,
## #   primary_expansion <lgl>, secondary_expansions <chr>,
## #   parent_account_company <chr>, see_another_account_company <lgl>,
## #   response_status_see_another <chr>, permission <chr>
## 
## [[3]]
## # A tibble: 7,455 x 15
##    account_number_… name  company_name_co… industry_activi… isin_company
##    <chr>            <chr> <chr>            <chr>            <chr>       
##  1 44               3i G… 3i Group         Banks, Diverse … GB00B1YW4409
##  2 46               3i I… 3i Infrastructu… Banks, Diverse … JE00B1RJLF86
##  3 37811            3L E… 3L ELECTRONIC I… Technology Hard… <NA>        
##  4 285              3M C… 3M Company       Consumer Durabl… US88579Y1010
##  5 44801            3M I… 3M India Ltd     Trading Compani… INE470A01017
##  6 44830            3S K… 3S Korea Compan… Technology Hard… KR7060310000
##  7 36995            4imp… 4imprint Group … Media            GB0006640972
##  8 30               888 … 888 Holdings     Hotels, Restaur… GI000A0F6407
##  9 22750            A & … A & J Mucklow G… Real Estate      GB0006091408
## 10 1535             A.G.… A.G. Barr Plc    Food & Beverage… GB00B6XZKY75
## # … with 7,445 more rows, and 10 more variables: country_company <chr>,
## #   project_type <chr>, project_year_project <chr>, response_status <chr>,
## #   primary_expansion <lgl>, secondary_expansions <chr>,
## #   parent_account_company <chr>, see_another_account_company <lgl>,
## #   response_status_see_another <chr>, permission <chr>
## 
## [[4]]
## # A tibble: 7,167 x 15
##    account_number_… name  company_name_co… industry_activi… isin_company
##    <chr>            <chr> <chr>            <chr>            <chr>       
##  1 50309            3D S… 3D Systems Corp  Technology Hard… US88554D2053
##  2 44               3i G… 3i Group         Banks, Diverse … GB00B1YW4409
##  3 46               3i I… 3i Infrastructu… Banks, Diverse … JE00B1RJLF86
##  4 285              3M C… 3M Company       Consumer Durabl… US88579Y1010
##  5 44801            3M I… 3M India Ltd     Trading Compani… INE470A01017
##  6 44830            3S K… 3S Korea Compan… Technology Hard… KR7060310000
##  7 36995            4imp… 4imprint Group … Media            GB0006640972
##  8 30               888 … 888 Holdings     Hotels, Restaur… GI000A0F6407
##  9 22750            A & … A & J Mucklow G… Real Estate      GB0006091408
## 10 1535             A.G.… A.G. Barr Plc    Food & Beverage… GB00B6XZKY75
## # … with 7,157 more rows, and 10 more variables: country_company <chr>,
## #   project_type <chr>, project_year_project <chr>, response_status <chr>,
## #   primary_expansion <lgl>, secondary_expansions <chr>,
## #   parent_account_company <chr>, see_another_account_company <lgl>,
## #   response_status_see_another <chr>, permission <chr>
## 
## [[5]]
## # A tibble: 6,302 x 15
##    account_number_… name  company_name_co… industry_activi… isin_company
##    <chr>            <chr> <chr>            <chr>            <chr>       
##  1 44               3i G… 3i Group         Banks, Diverse … GB00B1YW4409
##  2 46               3i I… 3i Infrastructu… Banks, Diverse … JE00B1RJLF86
##  3 285              3M C… 3M Company       Consumer Durabl… US88579Y1010
##  4 44801            3M I… 3M India Ltd     Trading Compani… INE470A01017
##  5 54818            4Ene… 4Energy Invest … Electric Utilit… <NA>        
##  6 36995            4imp… 4imprint Group … Media            GB0006640972
##  7 30               888 … 888 Holdings     Hotels, Restaur… GI000A0F6407
##  8 22750            A & … A & J Mucklow G… Real Estate      GB0006091408
##  9 1535             A.G.… A.G. Barr Plc    Food & Beverage… GB00B6XZKY75
## 10 276              A.P.… A.P. Moller - M… Water Transport… DK0010244508
## # … with 6,292 more rows, and 10 more variables: country_company <chr>,
## #   project_type <chr>, project_year_project <chr>, response_status <chr>,
## #   primary_expansion <lgl>, secondary_expansions <chr>,
## #   parent_account_company <chr>, see_another_account_company <lgl>,
## #   response_status_see_another <chr>, permission <chr>
## 
## [[6]]
## # A tibble: 5,815 x 8
##    account_number_… company industry_activi… isin  country_company
##    <chr>            <chr>   <chr>            <chr> <chr>          
##  1 44               3i Gro… Banks, Diverse … GB00… United Kingdom 
##  2 46               3i Inf… Banks, Diverse … JE00… Channel Islands
##  3 285              3M Com… Consumer Durabl… US88… USA            
##  4 36995            4impri… Media            GB00… United Kingdom 
##  5 59323            58.com… <NA>             <NA>  China          
##  6 30               888 Ho… Hotels, Restaur… GI00… Gibraltar      
##  7 22750            A & J … Real Estate      GB00… United Kingdom 
##  8 59909            A Schu… <NA>             <NA>  USA            
##  9 1535             A.G. B… Food & Beverage… GB00… United Kingdom 
## 10 276              A.P. M… Water Transport… DK00… Denmark        
## # … with 5,805 more rows, and 3 more variables: secondary_expansions <chr>,
## #   program <chr>, response_status <chr>
## 
## [[7]]
## # A tibble: 6,028 x 7
##    account_number_… name  country_company industry_activi… isin  project
##    <chr>            <chr> <chr>           <chr>            <chr> <chr>  
##  1 44               3i G… United Kingdom  Banks, Diverse … GB00… Climat…
##  2 285              3M C… USA             Consumer Durabl… US88… Climat…
##  3 44801            3M I… India           Trading Compani… INE4… Climat…
##  4 64083            3Sbi… China           Pharmaceuticals… <NA>  Climat…
##  5 36995            4imp… United Kingdom  Media            GB00… Climat…
##  6 59323            58.c… China           Software & Serv… <NA>  Climat…
##  7 59909            A Sc… USA             Chemicals        <NA>  Climat…
##  8 1535             A.G.… United Kingdom  Food & Beverage… GB00… Climat…
##  9 276              A.P.… Denmark         Water Transport… DK00… Climat…
## 10 50108            A2 M… New Zealand     Food & Beverage… NZAT… Climat…
## # … with 6,018 more rows, and 1 more variable: response_status <chr>
## 
## [[8]]
## # A tibble: 6,083 x 15
##    organization_no… name  account_name_re… primary_activit… country_request…
##    <chr>            <chr> <chr>            <chr>            <chr>           
##  1 74021            CDP … (Sichuan) Tianq… Inorganic base … China           
##  2 4919             CDP … 1&1 Drillisch AG Telecommunicati… Germany         
##  3 44               CDP … 3i Group         Asset owners, m… United Kingdom …
##  4 285              CDP … 3M Company       Specialty chemi… United States o…
##  5 44801            CDP … 3M India Ltd     Other container… India           
##  6 64083            CDP … 3Sbio            Pharmaceuticals… China           
##  7 36995            CDP … 4imprint Group … Media & marketi… United Kingdom …
##  8 59323            CDP … 58.com Inc       Media & marketi… China           
##  9 30               CDP … 888 Holdings     Gambling - Ente… Gibraltar       
## 10 59909            CDP … A Schulman Inc   Specialty chemi… United States o…
## # … with 6,073 more rows, and 10 more variables: authority <chr>, isin <chr>,
## #   ticker_symbol_requested_organization_organization <chr>,
## #   sample_summary_invitation_invitation <chr>, request_response_status <chr>,
## #   response_status_invitation_invitation <chr>,
## #   engagement_status_invitation_invitation <chr>,
## #   parent_organization_requested_organization_organization <chr>,
## #   public_non_public_invitation_invitation <chr>,
## #   name_invitation_invitation <chr>

put the cleaned column names into variables corresponding to names in sheets

  • col_account
  • col_company
  • col_country
  • col_industry
year_in  <- 2010:2017
col_account  <- c(rep("account_number_company", 7), "organization_no_requested_organization_organization")
col_account
## [1] "account_number_company"                             
## [2] "account_number_company"                             
## [3] "account_number_company"                             
## [4] "account_number_company"                             
## [5] "account_number_company"                             
## [6] "account_number_company"                             
## [7] "account_number_company"                             
## [8] "organization_no_requested_organization_organization"
col_company  <- c(rep("company_name_company", 5), "company", rep("name",2))
col_company
## [1] "company_name_company" "company_name_company" "company_name_company"
## [4] "company_name_company" "company_name_company" "company"             
## [7] "name"                 "name"
col_country  <- c(rep("country_company", 7), "country_requested_organization_organization")
col_country
## [1] "country_company"                            
## [2] "country_company"                            
## [3] "country_company"                            
## [4] "country_company"                            
## [5] "country_company"                            
## [6] "country_company"                            
## [7] "country_company"                            
## [8] "country_requested_organization_organization"
col_industry  <- c(rep("industry_activity_group_company",5), "industry_activity_group", "industry_activity_group_company", "primary_activity_requested_organization_organization")
col_industry
## [1] "industry_activity_group_company"                     
## [2] "industry_activity_group_company"                     
## [3] "industry_activity_group_company"                     
## [4] "industry_activity_group_company"                     
## [5] "industry_activity_group_company"                     
## [6] "industry_activity_group"                             
## [7] "industry_activity_group_company"                     
## [8] "primary_activity_requested_organization_organization"
all_resp  <- function(path_resp, sheet_resp, year_in, col_account, col_company, col_country, 
    col_industry){
cleaned  <- read_excel(path = path_resp, sheet = sheet_resp)  %>% clean_names() 
cleaned  %>% 
mutate(year = year_in)  %>% 
    select(account = col_account, year, company = col_company, country = col_country,  
     industry = col_industry)  
}

l_resp  <- list(path_resp, sheet_resp, year_in, col_account, col_company, col_country, 
    col_industry)

df_responses  <- pmap_dfr(l_resp, all_resp)
df_responses 
## # A tibble: 52,478 x 5
##    account  year company        country     industry                            
##    <chr>   <int> <chr>          <chr>       <chr>                               
##  1 44       2010 3i Group       United Kin… Banks, Diverse Financials, Insurance
##  2 46       2010 3i Infrastruc… Channel Is… Banks, Diverse Financials, Insurance
##  3 285      2010 3M Company     USA         Consumer Durables, Household and Pe…
##  4 30       2010 888 Holdings   Gibraltar   Hotels, Restaurants & Leisure, and …
##  5 22750    2010 A & J Mucklow… United Kin… Real Estate                         
##  6 1535     2010 A.G. Barr Plc  United Kin… Food & Beverage Processing          
##  7 276      2010 A.P. Moller -… Denmark     Water Transportation - Water Transp…
##  8 87       2010 A2A            Italy       Electric Utilities & Independent Po…
##  9 28813    2010 A4e            United Kin… Education Services                  
## 10 94       2010 Aalberts Indu… Netherlands Electrical Equipment and Machinery  
## # … with 52,468 more rows
  • df_responses has 52478 rows
  • write to csv file
df_responses  %>% write_csv("~/Dropbox/data/cdp/investor/derived-data/df_responses.csv")
Avatar
Elizabeth Stanny
Professor of Business Administration

My research interests include climate change and political disclosures.

Related