This is a wrapper around sample.int() to make it easy to select random rows from a table. It currently only works for local tbls.

sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL)

sample_frac(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL)

Arguments

tbl

tbl of data.

size

For sample_n(), the number of rows to select. For sample_frac(), the fraction of rows to select. If tbl is grouped, size applies to each group.

replace

Sample with or without replacement?

weight

Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.

This argument is automatically quoted and later evaluated in the context of the data frame. It supports unquoting. See vignette("programming") for an introduction to these concepts.

.env

This variable is deprecated and no longer has any effect. To evaluate weight in a particular context, you can now unquote a quosure.

Examples

by_cyl <- mtcars %>% group_by(cyl) # Sample fixed number per group sample_n(mtcars, 10)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> [ reached getOption("max.print") -- omitted 1 row ]
sample_n(mtcars, 50, replace = TRUE)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Lotus Europa.1 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Toyota Corona.1 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> [ reached getOption("max.print") -- omitted 41 rows ]
sample_n(mtcars, 10, weight = mpg)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> [ reached getOption("max.print") -- omitted 1 row ]
sample_n(by_cyl, 3)
#> # A tibble: 9 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 2 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 3 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 4 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6 #> 6 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> 7 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> 8 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 #> 9 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3
sample_n(by_cyl, 10, replace = TRUE)
#> # A tibble: 30 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 2 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 3 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 4 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 5 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 6 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> 7 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 8 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 9 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 10 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> # ... with 20 more rows
sample_n(by_cyl, 3, weight = mpg / mean(mpg))
#> # A tibble: 9 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 2 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 3 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 4 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> 5 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 6 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6 #> 7 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3 #> 8 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 #> 9 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
# Sample fixed fraction per group # Default is to sample all data = randomly resample rows sample_frac(mtcars)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> [ reached getOption("max.print") -- omitted 23 rows ]
sample_frac(mtcars, 0.1)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
sample_frac(mtcars, 1.5, replace = TRUE)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Datsun 710.1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> [ reached getOption("max.print") -- omitted 39 rows ]
sample_frac(mtcars, 0.1, weight = 1 / mpg)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> Merc 280C 17.8 6 167.6 123 3.92 3.44 18.90 1 0 4 4 #> Merc 450SL 17.3 8 275.8 180 3.07 3.73 17.60 0 0 3 3 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.44 17.02 0 0 3 2
sample_frac(by_cyl, 0.2)
#> # A tibble: 6 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 2 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 5 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> 6 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2
sample_frac(by_cyl, 1, replace = TRUE)
#> # A tibble: 32 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 3 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 4 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 5 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 6 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 7 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 8 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 9 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 10 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> # ... with 22 more rows