summaryrefslogtreecommitdiff
path: root/tests/hist.r
blob: ee20d09ed131009751c5fd2a82d1f8643a364c3b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env Rscript

# Parse arguments (filename, column number, min, max, bins)
args <- commandArgs(trailingOnly = TRUE)
filename <- args[1]
# start_row <- as.numeric(args[2])
start_row <- 2
col_num <- as.numeric(args[2])

# Optional min and max values (positions 3 and 4)
user_min <- ifelse(length(args) >= 3, as.numeric(args[3]), NA)
user_max <- ifelse(length(args) >= 4, as.numeric(args[4]), NA)

# Number of bins now at the end (position 5)
num_bins <- ifelse(length(args) >= 5, as.numeric(args[5]), 20)  # Default to 20 bins if not specified

# Read the CSV file (assuming it has a header)
data <- read.csv(filename, header = TRUE)

# Extract the data
plot_data <- data[start_row:nrow(data), col_num]

# Convert to numeric
plot_data <- as.numeric(as.character(plot_data))
plot_data <- plot_data[!is.na(plot_data)]

# Determine range bounds
data_min <- min(plot_data)
data_max <- max(plot_data)

range_min <- ifelse(is.na(user_min), data_min, user_min)
range_max <- ifelse(is.na(user_max), data_max, user_max)

# Calculate percentage of samples within the range
count_in_range <- sum(plot_data >= range_min & plot_data <= range_max)
total_count <- length(plot_data)
percent_in_range <- (count_in_range / total_count) * 100

# Output to command line
cat(sprintf("%.2f%% of samples are within the range [%.2f, %.2f]\n", 
            percent_in_range, range_min, range_max))

# Set up graphics
windows(width = 8, height = 6)

# Create the histogram
hist(plot_data, 
     breaks = num_bins,
     main = paste("Histogram of Column", col_num),
     xlab = "Value",
     col = "lightblue",
     border = "black")

# Add vertical lines for min and max if provided by user
if(!is.na(user_min)) {
  abline(v = user_min, col = "red", lwd = 2, lty = 2)
}
if(!is.na(user_max)) {
  abline(v = user_max, col = "red", lwd = 2, lty = 2)
}

# Add basic statistics including standard deviation
stats_text <- paste0("n = ", length(plot_data), "\n",
                    "min = ", sprintf("%.2f", data_min), "\n",
                    "max = ", sprintf("%.2f", data_max), "\n",
                    "mean = ", sprintf("%.2f", mean(plot_data)), "\n",
                    "median = ", sprintf("%.2f", median(plot_data)), "\n",
                    "sd = ", sprintf("%.2f", sd(plot_data)), "\n",
                    sprintf("%.2f%% in [%.2f, %.2f]", 
                           percent_in_range, range_min, range_max))

text_x <- par("usr")[1] + 0.05 * (par("usr")[2] - par("usr")[1])
text_y <- par("usr")[4] - 0.1 * (par("usr")[4] - par("usr")[3])
text(text_x, text_y, stats_text, adj = c(0, 1))

# Add text to instruct user
mtext("Close this window using the X button when done", side = 1, line = 4, col = "red")

# Tell user what to do
cat("Histogram displayed. Close the window manually when done viewing.\n")
cat("Usage: Rscript hist.r filename.csv column_number [min] [max] [bins]\n")

# Keep the script running until the window is closed
Sys.sleep(1000)  # Keep script running for a long time
Back to https://optics-design.com