Histogram

Dataset

First, create this sample data frame, sales_data, which contains random data about sales representatives in a company:

This data frame includes:

  • SalesRepID: A unique identifier for each sales representative.

  • Age: The age of each sales representative.

  • Sales: Total sales in dollars (normally distributed).

  • YearsExperience: Number of years of experience.

  • Region: The region where the representative is based.

Plot a histogram of the Sales variable. Set the color to blue.

Solution:

hist(sales_data$Sales, 
            col = "blue", 
           main = "Sales Distribution", 
           xlab = "Sales")

Plot a histogram of Age with 15 bins to see age distribution.

Solution:

hist(sales_data$Age, 
    breaks = 15, 
      main = "Age Distribution", 
      xlab = "Age")

Create a histogram of YearsExperience with 10 bins. Add a title and labels for the x-axis and y-axis.

Solution:

hist(sales_data$YearsExperience, 
     breaks = 10, 
       main = "Years of Experience Distribution", 
       xlab = "Years of Experience", 
       ylab = "Frequency")

Plot a histogram of Sales, but set freq = FALSE to show a density rather than counts on the y-axis.

Solution:

hist(sales_data$Sales, 
    freq = FALSE, 
    main = "Density of Sales", 
    xlab = "Sales")

Create a histogram of Age with custom break points at 20, 30, 40, 50, and 60.

Solution:

hist(sales_data$Age, 
     breaks = c(20, 30, 40, 50, 60), 
     main = "Age Distribution with Custom Breaks", 
     xlab = "Age")

Plot a histogram of Sales, setting the main title as “Distribution of Sales” and label the x-axis as “Sales Amount” and y-axis as “Frequency.”

Solution:

hist(sales_data$Sales, 
     main = "Distribution of Sales", 
     xlab = "Sales Amount", 
     ylab = "Frequency")

Create a histogram of Sales with freq = FALSE and overlay a density curve using lines(density(sales_data$Sales), col = "red").

Solution:

hist(sales_data$Sales, 
     freq = FALSE, 
     main = "Sales with Density Curve", 
     xlab = "Sales")

lines(density(sales_data$Sales), col = "red")

Plot a histogram of YearsExperience for sales representatives who are located in the “North” region.

Solution:

library(dplyr)
north_data <- sales_data |> 
              filter(Region == "North")
hist(north_data$YearsExperience, 
     main = "Years of Experience (North Region)", 
     xlab = "Years of Experience")

Create a histogram of Age, coloring the bins green, and set the border color of the bins to white.

Solution:

hist(sales_data$Age, 
      col = "green", 
   border = "white", 
     main = "Age Distribution", 
     xlab = "Age")

Plot two histograms of Sales, one for representatives in the “East” region and another for those in the “West” region. Display these histograms side-by-side using par(mfrow = c(1, 2)).

Solution:

library(dplyr)
par(mfrow = c(1, 2))  # Set layout for side-by-side plots

sales_data2 <-  sales_data |> 
                filter(Region == "East")  

# plot 1
hist(sales_data2$Sales, 
main = "Sales in East Region", 
     xlab = "Sales", 
      col = "blue")

sales_data3 <-  sales_data |> 
                filter(Region == "West")   
# plot 2                     
hist(sales_data3$Sales, 
     main = "Sales in West Region", 
     xlab = "Sales", 
      col = "orange")

par(mfrow = c(1, 1))  # Reset layout

Plot a histogram of Sales for representatives in the “North” region, then overlay a histogram of Sales for representatives in the “South” region on the same plot. Use different colors and adjust transparency to compare the sales distribution between these two regions.

Solution:

# Subset data by region
sales_north <- sales_data |>
               filter(Region == "North")
sales_south <- sales_data |>
               filter(Region == "South")

# Plot the first histogram
hist(sales_north$Sales, 
     col = rgb(1, 0, 0, 0.5), 
     main = "Sales Distribution (North vs South)", 
     xlab = "Sales", 
     xlim = range(sales_data$Sales), 
     ylim = c(0, 15))

# Add the second histogram
hist(sales_south$Sales, 
     col = rgb(0, 0, 1, 0.5), add = TRUE)

# Add legend
legend("topright", 
       legend = c("North", "South"), 
       fill = c(rgb(1, 0, 0, 0.5), 
                rgb(0, 0, 1, 0.5)))

Note Normally, I use the yarrr package to apply transparency to colors.”

Create two histograms on the same plot to show the Age distribution of representatives with YearsExperience less than 10 and those with YearsExperience of 10 or more. Use different colors and transparency to clearly visualize the overlap.

Solution:

# Subset data by experience level
age_less_10 <- sales_data |>
               filter(YearsExperience < 10)
age_10_or_more <- sales_data |>
                  filter(YearsExperience >= 10)

# Plot the first histogram
hist(age_less_10$Age, 
     col = rgb(0, 1, 0, 0.5), 
     main = "Age Distribution (Experience Level)", 
     xlab = "Age", 
     xlim = range(sales_data$Age), 
     ylim = c(0, 20))

# Add the second histogram
hist(age_10_or_more$Age, 
     col = rgb(1, 0.5, 0, 0.5), 
     add = TRUE)

# Add legend
legend("topright", 
       legend = c("Experience < 10", "Experience >= 10"), 
       fill = c(rgb(0, 1, 0, 0.5), 
                rgb(1, 0.5, 0, 0.5)))

Note Normally, I use the yarrr package to apply transparency to colors.”

Plot histograms of Sales for each region (“North”, “South”, “East”, and “West”) on the same graph. Use a different color for each region and add a legend to indicate which color corresponds to each region.

Solution:

# Subset data by region
sales_north <- sales_data |> 
               filter(Region == "North")
sales_south <- sales_data |>
               filter(Region == "South")
sales_east <- sales_data |>
               filter(Region == "East")
sales_west <- sales_data |>
               filter(Region == "West")

# Plot each histogram with different colors
hist(sales_north$Sales, 
     col = rgb(1, 0, 0, 0.4),
    main = "Sales Distribution by Region", 
     xlab = "Sales", 
     xlim = range(sales_data$Sales), 
     ylim = c(0, 12))
hist(sales_south$Sales, 
     col = rgb(0, 1, 0, 0.4), add = TRUE)
hist(sales_east$Sales, 
     col = rgb(0, 0, 1, 0.4), add = TRUE)
hist(sales_west$Sales, 
     col = rgb(1, 1, 0, 0.4), add = TRUE)

# Add legend
legend("topright", 
       legend = c("North", "South", "East", "West"), 
       fill = c(rgb(1, 0, 0, 0.4), 
                rgb(0, 1, 0, 0.4), 
                 rgb(0, 0, 1, 0.4), 
                 rgb(1, 1, 0, 0.4)))

Note Normally, I use the yarrr package to apply transparency to colors.”