tidybins



library(tidybins)
suppressPackageStartupMessages(library(dplyr))

Bin Value

Binning by value is the only original binning method implemented in this package. It is inspired by the case in marketing when accounts need to be binned by their sales. For example, creating 10 bins, where each bin represent 10% of all market sales. The first bin contains the highest sales accounts, thus has the small total number of accounts, whereas the last bin contains the smallest sales accounts, thus requiring the most number of accounts per bin to reach 10% of the market sales.


tibble::tibble(SALES = as.integer(rnorm(1000L, mean = 10000L, sd = 3000))) -> sales_data

sales_data %>% 
  bin_cols(SALES, bin_type = "value") -> sales_data1
#> Warning: SALES contains negative values. Negative values are treated as 0.

sales_data1
#> # A tibble: 1,000 × 2
#>    SALES SALES_va10
#>    <int>      <int>
#>  1  8125          2
#>  2  7412          2
#>  3 10957          6
#>  4  7264          2
#>  5  9146          3
#>  6 12182          7
#>  7  9108          3
#>  8  7237          2
#>  9 12102          7
#> 10 15464         10
#> # ℹ 990 more rows

Notice that the sum is equal across bins.

sales_data1 %>% 
  bin_summary() %>% 
  print(width = Inf)
#> # A tibble: 11 × 14
#>    column method      n_bins .rank  .min  .mean  .max .count .uniques
#>    <chr>  <chr>        <int> <int> <int>  <dbl> <int>  <int>    <int>
#>  1 SALES  equal value     10    10 14457 15668. 19157     64       64
#>  2 SALES  equal value     10     9 13198 13757. 14451     72       67
#>  3 SALES  equal value     10     8 12397 12764. 13119     78       75
#>  4 SALES  equal value     10     7 11692 12061. 12395     82       78
#>  5 SALES  equal value     10     6 10949 11315. 11684     88       82
#>  6 SALES  equal value     10     5 10188 10582. 10939     94       89
#>  7 SALES  equal value     10     4  9351  9761. 10187    102       98
#>  8 SALES  equal value     10     3  8480  8934.  9348    111      108
#>  9 SALES  equal value     10     2  7088  7786.  8479    128      120
#> 10 SALES  equal value     10     1   896  5512.  7079    180      174
#> 11 SALES  equal value     10     0 -1935 -1935  -1935      1        1
#>    relative_value    .sum   .med   .sd width
#>             <dbl>   <int>  <dbl> <dbl> <int>
#>  1          100   1002721 15539  1006.  4700
#>  2           87.8  990522 13664.  386.  1253
#>  3           81.5  995591 12754.  218.   722
#>  4           77.0  989040 12074   188.   703
#>  5           72.2  995699 11284   233.   735
#>  6           67.5  994665 10583   219.   751
#>  7           62.3  995573  9784.  254.   836
#>  8           57.0  991721  8955   251.   868
#>  9           49.7  996641  7726   389.  1391
#> 10           35.2  992103  5790. 1195.  6183
#> 11          -12.4   -1935 -1935    NA      0