#!/usr/bin/env Rscript # Parse arguments (filename, column number, min, max, bins) args <- commandArgs(trailingOnly = TRUE) filename <- args[1] # start_row <- as.numeric(args[2]) start_row <- 2 col_num <- as.numeric(args[2]) # Optional min and max values (positions 3 and 4) user_min <- ifelse(length(args) >= 3, as.numeric(args[3]), NA) user_max <- ifelse(length(args) >= 4, as.numeric(args[4]), NA) # Number of bins now at the end (position 5) num_bins <- ifelse(length(args) >= 5, as.numeric(args[5]), 20) # Default to 20 bins if not specified # Read the CSV file (assuming it has a header) data <- read.csv(filename, header = TRUE) # Extract the data plot_data <- data[start_row:nrow(data), col_num] # Convert to numeric plot_data <- as.numeric(as.character(plot_data)) plot_data <- plot_data[!is.na(plot_data)] # Determine range bounds data_min <- min(plot_data) data_max <- max(plot_data) range_min <- ifelse(is.na(user_min), data_min, user_min) range_max <- ifelse(is.na(user_max), data_max, user_max) # Calculate percentage of samples within the range count_in_range <- sum(plot_data >= range_min & plot_data <= range_max) total_count <- length(plot_data) percent_in_range <- (count_in_range / total_count) * 100 # Output to command line cat(sprintf("%.2f%% of samples are within the range [%.2f, %.2f]\n", percent_in_range, range_min, range_max)) # Set up graphics windows(width = 8, height = 6) # Create the histogram hist(plot_data, breaks = num_bins, main = paste("Histogram of Column", col_num), xlab = "Value", col = "lightblue", border = "black") # Add vertical lines for min and max if provided by user if(!is.na(user_min)) { abline(v = user_min, col = "red", lwd = 2, lty = 2) } if(!is.na(user_max)) { abline(v = user_max, col = "red", lwd = 2, lty = 2) } # Add basic statistics including standard deviation stats_text <- paste0("n = ", length(plot_data), "\n", "min = ", sprintf("%.2f", data_min), "\n", "max = ", sprintf("%.2f", data_max), "\n", "mean = ", sprintf("%.2f", mean(plot_data)), "\n", "median = ", sprintf("%.2f", median(plot_data)), "\n", "sd = ", sprintf("%.2f", sd(plot_data)), "\n", sprintf("%.2f%% in [%.2f, %.2f]", percent_in_range, range_min, range_max)) text_x <- par("usr")[1] + 0.05 * (par("usr")[2] - par("usr")[1]) text_y <- par("usr")[4] - 0.1 * (par("usr")[4] - par("usr")[3]) text(text_x, text_y, stats_text, adj = c(0, 1)) # Add text to instruct user mtext("Close this window using the X button when done", side = 1, line = 4, col = "red") # Tell user what to do cat("Histogram displayed. Close the window manually when done viewing.\n") cat("Usage: Rscript hist.r filename.csv column_number [min] [max] [bins]\n") # Keep the script running until the window is closed Sys.sleep(1000) # Keep script running for a long time