This is a wrapper around sample.int() to make it easy to select random rows from a table. It currently only works for local tbls.

sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...)

sample_frac(tbl, size = 1, replace = FALSE, weight = NULL,
  .env = NULL, ...)

Arguments

tbl

tbl of data.

size

For sample_n(), the number of rows to select. For sample_frac(), the fraction of rows to select. If tbl is grouped, size applies to each group.

replace

Sample with or without replacement?

weight

Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.

This argument is automatically quoted and later evaluated in the context of the data frame. It supports unquoting. See vignette("programming") for an introduction to these concepts.

.env

This variable is deprecated and no longer has any effect. To evaluate weight in a particular context, you can now unquote a quosure.

...

ignored

Examples

by_cyl <- mtcars %>% group_by(cyl) # Sample fixed number per group sample_n(mtcars, 10)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 2 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 3 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 4 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 5 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 6 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> 7 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 8 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 9 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 10 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
sample_n(mtcars, 50, replace = TRUE)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> 2 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 3 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 4 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 5 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 6 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 7 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 8 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 9 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 10 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 11 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 12 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> 13 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 14 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 15 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 16 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 17 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 18 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 19 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 20 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 21 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 22 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 23 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 24 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 25 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> 26 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 27 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 28 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 29 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> 30 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 31 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 32 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 33 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 34 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 35 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> 36 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 37 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 38 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 39 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 40 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 41 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 42 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 43 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 44 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 45 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 46 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> 47 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 48 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 49 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 50 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
sample_n(mtcars, 10, weight = mpg)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 2 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> 3 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 4 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> 5 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 6 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> 7 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> 8 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 9 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 10 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
sample_n(by_cyl, 3)
#> # A tibble: 9 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 2 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 3 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> 4 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 5 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 6 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4 #> 9 15.2 8 276. 180 3.07 3.78 18 0 0 3 3
sample_n(by_cyl, 10, replace = TRUE)
#> # A tibble: 30 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 2 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 3 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> 4 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 5 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 6 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 7 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 8 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 9 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 10 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> # … with 20 more rows
sample_n(by_cyl, 3, weight = mpg / mean(mpg))
#> # A tibble: 9 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 3 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6 #> 6 17.8 6 168. 123 3.92 3.44 18.9 1 0 4 4 #> 7 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 8 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 #> 9 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3
# Sample fixed fraction per group # Default is to sample all data = randomly resample rows sample_frac(mtcars)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> 2 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 3 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 4 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 5 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> 6 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 7 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 8 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> 9 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 10 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> 11 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 12 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 13 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 14 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 15 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> 16 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 17 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 18 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> 19 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 20 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 21 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 22 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> 23 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 24 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 25 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 26 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 27 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 29 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 30 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 31 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> 32 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
sample_frac(mtcars, 0.1)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> 2 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> 3 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
sample_frac(mtcars, 1.5, replace = TRUE)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 2 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 3 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> 4 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 5 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> 6 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 7 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 8 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 9 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 10 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 11 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 12 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 13 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> 14 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 15 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 16 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 17 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 18 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 19 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 20 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 21 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 22 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 23 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 24 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> 25 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 26 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 27 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 28 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 29 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 30 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 31 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 32 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 33 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 34 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 35 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 36 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 37 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 38 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 39 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 40 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 41 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 42 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 43 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 44 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 45 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 46 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 47 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 48 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
sample_frac(mtcars, 0.1, weight = 1 / mpg)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 17.3 8 275.8 180 3.07 3.73 17.6 0 0 3 3 #> 2 19.7 6 145.0 175 3.62 2.77 15.5 0 1 5 6 #> 3 21.4 4 121.0 109 4.11 2.78 18.6 1 1 4 2
sample_frac(by_cyl, 0.2)
#> # A tibble: 6 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 2 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 3 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> 4 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 #> 5 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 6 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
sample_frac(by_cyl, 1, replace = TRUE)
#> # A tibble: 32 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 3 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 4 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 5 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 6 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> 7 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 8 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 9 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 10 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> # … with 22 more rows