/** * tolsac.c - source file containing creation of samples with various distributions. * * Copyright (C) 2025 https://optics-design.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * See the COPYING file for the full license text. */ #include #include #include #include #include "tolmacdef.h" // Enum for sampling methods typedef enum { SAMPLING_LHS, // Latin Hypercube Sampling SAMPLING_MCS, // Monte Carlo Sampling SAMPLING_GRID, // Grid Sampling SAMPLING_SENS // Sensitivity Analysis } SamplingMethod; // Enum for probability distribution functions typedef enum { PDF_UNIFORM = 'U', PDF_NORMAL = 'N', PDF_TRUNCNORMAL = 'T' // Truncated normal } PdfType; // Parameter data structure for improved memory locality typedef struct { byte* pdf_types; // Probability distribution types f64* nominal_values; // Nominal values f64* min_values; // Minimum values f64* max_values; // Maximum values f64* mean_values; // Mean values f64* stddev_values; // Standard deviation values } ParameterData; // Allocate memory for parameter data structure ParameterData* allocate_parameter_data(i32 param_count) { ParameterData* data = (ParameterData*)malloc(sizeof(ParameterData)); if (!data) return NULL; // Single contiguous allocation for all arrays for better cache locality void* buffer = malloc( param_count * sizeof(byte) + // pdf_types param_count * sizeof(f64) * 5 // nominal, min, max, mean, stddev ); if (!buffer) { free(data); return NULL; } // Assign pointers to appropriate sections of the buffer data->pdf_types = (byte*)buffer; data->nominal_values = (f64*)(data->pdf_types + param_count); data->min_values = data->nominal_values + param_count; data->max_values = data->min_values + param_count; data->mean_values = data->max_values + param_count; data->stddev_values = data->mean_values + param_count; return data; } // Free parameter data structure void free_parameter_data(ParameterData* data) { if (data) { // Only need to free the buffer pointed to by pdf_types (first allocation) free(data->pdf_types); free(data); } } // Normal CDF implementation (cumulative distribution function) static inline f64 normal_cdf(f64 x) { // Constants for approximation const f64 a1 = 0.254829592; const f64 a2 = -0.284496736; const f64 a3 = 1.421413741; const f64 a4 = -1.453152027; const f64 a5 = 1.061405429; const f64 p = 0.3275911; // Save the sign of x int sign = 1; if (x < 0) { sign = -1; x = -x; } // A&S formula 7.1.26 f64 t = 1.0 / (1.0 + p * x); f64 y = 1.0 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * exp(-x * x)); return 0.5 * (1 + sign * y); } static inline f64 inverse_normal_cdf(f64 u) { // Use the approximation of the inverse CDF of the normal distribution const f64 a1 = -3.969683028665376e+01; const f64 a2 = 2.209460984245205e+02; const f64 a3 = -2.759285104469687e+02; const f64 a4 = 1.383577518672690e+02; const f64 a5 = -3.066479806614716e+01; const f64 a6 = 2.506628277459239e+00; const f64 b1 = -5.447609879822406e+01; const f64 b2 = 1.615858368580409e+02; const f64 b3 = -1.556989798598866e+02; const f64 b4 = 6.680131188771972e+01; const f64 b5 = -1.328068155288572e+01; const f64 c1 = -7.784894002430293e-03; const f64 c2 = -3.223964580411365e-01; const f64 c3 = -2.400758277161838e+00; const f64 c4 = -2.549732539343734e+00; const f64 c5 = 4.374664141464968e+00; const f64 c6 = 2.938163982698783e+00; const f64 d1 = 7.784695709041462e-03; const f64 d2 = 3.224671290700398e-01; const f64 d3 = 2.445134137142996e+00; const f64 d4 = 3.754408661907416e+00; const f64 p_low = 0.02425; const f64 p_high = 1.0 - p_low; f64 q, r; if (u < p_low) { q = sqrt(-2.0 * log(u)); return (((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1.0); } else if (u <= p_high) { q = u - 0.5; r = q * q; return (((((a1 * r + a2) * r + a3) * r + a4) * r + a5) * r + a6) * q / (((((b1 * r + b2) * r + b3) * r + b4) * r + b5) * r + 1.0); } else { q = sqrt(-2.0 * log(1.0 - u)); return -(((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1.0); } } // Generate sample based on distribution type static inline f64 generate_sample(byte pdf_type, f64 u, f64 min, f64 max, f64 mean, f64 stddev) { switch (pdf_type) { case PDF_NORMAL: // 'N' // For normal: mean + stddev * inverse_normal_cdf(u) return mean + stddev * inverse_normal_cdf(u); case PDF_TRUNCNORMAL: // 'T' // For truncated normal: { // 1. Calculate normalized min and max values f64 z_min = (min - mean) / stddev; f64 z_max = (max - mean) / stddev; // 2. Calculate CDF at min and max f64 cdf_min = normal_cdf(z_min); f64 cdf_max = normal_cdf(z_max); // 3. Scale u to be between cdf_min and cdf_max f64 u_scaled = cdf_min + u * (cdf_max - cdf_min); // 4. Apply inverse CDF to get value in correct range return mean + stddev * inverse_normal_cdf(u_scaled); } case PDF_UNIFORM: // 'U' // For uniform: min + u * (max - min) return min + u * (max - min); default: // Default to uniform if unknown return min + u * (max - min); } } // Function to display help information void display_help(void) { printf("DISCLAIMER:\n" "**USE AT YOUR OWN RISK**: The information provided by this program is for general informational purposes only. All information is provided in good faith, however I make no representation or warranty of any kind, express or implied, regarding the accuracy, adequacy, validity, reliability, availability, or completeness of the displayed optical data.\n" "I am not responsible for any errors or omissions, or for the results obtained from the use of this information. All information is provided as is, with no guarantee of completeness, accuracy, timeliness or of the results obtained from the use of this information. Users should independently verify any optical data before making decisions based on it.\n" "In no event will I be liable for any loss or damage including without limitation, indirect or consequential loss or damage, or any loss or damage whatsoever arising from loss of data or profits arising out of, or in connection with, the use of this software.\n\n" "Description:\n" "The program generates random values for parameters based on the " "given range and the probability distribution. " "It is expected that the output is used in another program " "to perform tolerancing.\n\n" "Usage:\n" ".\\tolsamp.exe Input.csv 50 [-o Output.csv]\n" "Where:\n" "Input.csv is a csv input file of a specific form.\n" "50 is the number of iterations produced (not required for the " "grid and sensitivity samplings)\n" "-o Output.csv specifies the output path and filename.\n" "If not provided, output will be named Sampled_[Input.csv] in the current directory.\n" "If you are unsure how the input should look like, " "run the program with the -e option to generate an example " "input file.\n\n" "Options:\n" "-l, -g, -m, -s are used for latin hypercube, grid, Monte Carlo " "(random), and sensitivity samplings.\n" "Use -m only if there are some issues with the LHS. " "LHS usually is the best.\n" "-o specifies the output filename (can include path).\n" "-e is used for example csv generation.\n\n" "Defaults:\n" "In the default case the program runs with -l.\n\n" "Input CSV file info:\n" "CSV file also contains possible probability density functions " "(U-Uniform, N-Normal, T-Truncated Normal) and the associated mean " "and standard deviation parameters.\n" "This program can produce symmetric normal and truncated normal distributions."); } // Function to generate example file void generate_example(void) { FILE *writefile = fopen("Example.csv", "w"); if (!writefile) { printf("Error creating example file!\n"); return; } fprintf(writefile, "Iteration Number,Parameter 1 Name,Parameter 2 Name,Parameter 3 Name,Parameter 4 Name\n"); fprintf(writefile, "Probability Density Function (U=Uniform; N=Normal/Gaussian; T=Truncated Normal),U,U,N,T\n"); fprintf(writefile, "Nominal, 0, 0, 6, 5\n"); fprintf(writefile, "Max, 1, 0.1, 10, 7\n"); fprintf(writefile, "Min, -1, -0.1, 2, 3\n"); fprintf(writefile, "Mean,0.0, 0.0, 6.0, 5.0\n"); fprintf(writefile, "Standard Deviation, 0, 0, 1.3, 1.0\n"); fclose(writefile); printf("Example.csv file generated!\n"); } // Optimized LHS interval generation that transposes for better cache locality void generate_lhs_intervals(i32 iterations, i32 param_count, i32* intervals) { // Initialize intervals for LHS in column-major order for better cache locality for (i32 i = 0; i < iterations; i++) { for (i32 j = 0; j < param_count; j++) { // Store in transposed form: intervals[i * param_count + j] = i intervals[i * param_count + j] = i; } } // Fisher-Yates shuffle for each parameter (each column) for (i32 j = 0; j < param_count; j++) { for (i32 i = iterations - 1; i > 0; i--) { i32 k = rng_int32(i + 1); // Swap intervals[i * param_count + j] and intervals[k * param_count + j] i32 temp = intervals[i * param_count + j]; intervals[i * param_count + j] = intervals[k * param_count + j]; intervals[k * param_count + j] = temp; } } } i32 main(i32 argc, char *argv[]) { // Initialize variables SamplingMethod sampling_method = SAMPLING_LHS; // Default to LHS i32 iterations = 0; char rfilename[FILENAME_MAX] = {0}; char wfilename[FILENAME_MAX] = {0}; i32 input_specified = 0; i32 output_specified = 0; i32 expect_output_file = 0; // Check for no arguments if (argc == 1) { printf("No input provided. Use -h for help.\n"); return 1; } // Process all arguments for (i32 i = 1; i < argc; i++) { if (argv[i][0] == '-') { // Process flag arguments switch (argv[i][1]) { case 'h': display_help(); return 0; case 'e': generate_example(); return 0; case 'm': sampling_method = SAMPLING_MCS; break; case 'g': sampling_method = SAMPLING_GRID; break; case 'l': sampling_method = SAMPLING_LHS; break; case 's': sampling_method = SAMPLING_SENS; break; case 'o': // Next argument should be the output file expect_output_file = 1; break; default: printf("Unknown option: -%c\n", argv[i][1]); break; } } else if (expect_output_file) { // This is the output file specified after -o strcpy(wfilename, argv[i]); output_specified = 1; expect_output_file = 0; } else if (strstr(argv[i], ".csv")) { // This is an input CSV file (we now only allow one) if (!input_specified) { strcpy(rfilename, argv[i]); input_specified = 1; } else { printf("Warning: Multiple input files specified. Using only the first one: %s\n", rfilename); } } else { // Parse iteration count i32 tmp = atoi(argv[i]); if (tmp > 0) { iterations = tmp; } } } // Error checks if (!input_specified) { printf("Error! No input CSV provided. Use -e if you want to see how it looks like.\n"); return 1; } if (expect_output_file) { printf("Error! Expected an output file after -o option.\n"); return 1; } if (iterations == 0 && sampling_method != SAMPLING_GRID && sampling_method != SAMPLING_SENS) { printf("Error! You didn't provide an integer number of iterations.\n"); return 1; } // Generate default output filename if not specified if (!output_specified) { // Generate default output filename - handle both Windows and Unix paths char *file_ptr = strrchr(rfilename, '\\'); if (!file_ptr) { file_ptr = strrchr(rfilename, '/'); } strcpy(wfilename, "Sampled_"); if (file_ptr) { strcat(wfilename, file_ptr + 1); } else { strcat(wfilename, rfilename); } } // Check if input file exists FILE *readfile = fopen(rfilename, "r"); if (!readfile) { printf("Error! Input file not found: %s\n", rfilename); return 1; } // Open output file FILE *writefile = fopen(wfilename, "w"); if (!writefile) { printf("Error! Could not create output file: %s\n", wfilename); fclose(readfile); return 1; } // Read the header line char header_line[1024] = {0}; if (!fgets(header_line, sizeof(header_line), readfile)) { printf("Error reading header line!\n"); fclose(readfile); fclose(writefile); return 1; } // Write the header line to output fputs(header_line, writefile); // Count parameters (commas) i32 param_count = 0; for (char *p = header_line; *p; p++) { if (*p == ',') param_count++; } printf("1. Total of %d parameters detected in %s. Output file is named %s.\n", param_count, rfilename, wfilename); // Initialize RNG with current time as seed rng_init(0); // Allocate parameter data structure ParameterData* params = allocate_parameter_data(param_count); if (!params) { printf("Error! Memory allocation failed for parameter data.\n"); fclose(readfile); fclose(writefile); return 1; } // Parse input file - using larger fixed size buffers to avoid potential overflows char buffer[32768]; // 32KB buffer for reading lines char *token, *err_ptr; i32 k = 0; // PDFs if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading PDF line from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->pdf_types[k++] = *token; } // Nominal k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading nominal values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->nominal_values[k++] = strtof(token, &err_ptr); } // Maxima k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading maximum values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->max_values[k++] = strtof(token, &err_ptr); } // Minima k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading minimum values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->min_values[k++] = strtof(token, &err_ptr); } // Means k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading mean values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->mean_values[k++] = strtof(token, &err_ptr); } // Standard Deviations k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading standard deviation values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->stddev_values[k++] = strtof(token, &err_ptr); } fclose(readfile); printf("2. Input CSV successfully parsed.\n"); // Write initial reference row fprintf(writefile, "1"); for (i32 j = 0; j < param_count; j++) { fprintf(writefile, ",%g", params->nominal_values[j]); } fprintf(writefile, "\n"); // Generate and write samples based on sampling method switch (sampling_method) { case SAMPLING_MCS: // Monte Carlo sampling for (i32 i = 0; i < iterations; i++) { // Write row number fprintf(writefile, "%d", i + 2); // Generate samples for each parameter for (i32 j = 0; j < param_count; j++) { f64 u = rng_uniform_f64(); f64 sample = generate_sample(params->pdf_types[j], u, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); // If truncated normal and out of bounds, use uniform random between min and max if (params->pdf_types[j] == PDF_TRUNCNORMAL && (sample < params->min_values[j] || sample > params->max_values[j])) { f64 u_uniform = rng_uniform_f64(); sample = generate_sample('U', u_uniform, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); } fprintf(writefile, ",%g", sample); } fprintf(writefile, "\n"); } break; case SAMPLING_GRID: // Handle grid sampling iterations for large parameter counts if (param_count >= 30) { // 2^30 would be over a billion iterations printf("Error: Grid sampling with %d parameters would result in too many iterations (2^%d).\n", param_count, param_count); free_parameter_data(params); fclose(writefile); return 1; } else { iterations = 1 << param_count; if (param_count > 17) { char response; printf("You have %d parameters, which corresponds to %d iterations. Are you sure this is what you want? (y/n): ", param_count, iterations); scanf("%c", &response); if (response != 'y') { printf("Quitting.\n"); free_parameter_data(params); fclose(writefile); return 1; } } } // Grid sampling for (i32 i = 0; i < iterations; i++) { // Write row number fprintf(writefile, "%d", i + 2); // Generate samples for each parameter for (i32 j = 0; j < param_count; j++) { f64 sample; // Safe bit manipulation even for large parameter counts if ((i >> j) & 1) { sample = params->max_values[j]; } else { sample = params->min_values[j]; } fprintf(writefile, ",%g", sample); } fprintf(writefile, "\n"); } break; case SAMPLING_SENS: // Calculate number of iterations needed: 2 * number of parameters iterations = 2 * param_count; // For each parameter for (i32 j = 0; j < param_count; j++) { // Parameter at minimum, others at nominal fprintf(writefile, "%d", j*2 + 2); for (i32 k = 0; k < param_count; k++) { if (k == j) { // This parameter at minimum fprintf(writefile, ",%g", params->min_values[k]); } else { // Others at nominal fprintf(writefile, ",%g", params->nominal_values[k]); } } fprintf(writefile, "\n"); // Parameter at maximum, others at nominal fprintf(writefile, "%d", j*2 + 3); for (i32 k = 0; k < param_count; k++) { if (k == j) { // This parameter at maximum fprintf(writefile, ",%g", params->max_values[k]); } else { // Others at nominal fprintf(writefile, ",%g", params->nominal_values[k]); } } fprintf(writefile, "\n"); } break; case SAMPLING_LHS: // Allocate intervals for LHS (transposed for better cache locality) i32 *intervals = (i32*)malloc(iterations * param_count * sizeof(i32)); if (!intervals) { printf("Error! Memory allocation failed for intervals.\n"); free_parameter_data(params); fclose(writefile); return 1; } // Generate LHS intervals with improved cache locality generate_lhs_intervals(iterations, param_count, intervals); // Latin Hypercube sampling for (i32 i = 0; i < iterations; i++) { // Write row number fprintf(writefile, "%d", i + 2); // Generate samples for each parameter for (i32 j = 0; j < param_count; j++) { // Get interval for this parameter (accessing in transposed form) i32 interval = intervals[i * param_count + j]; f64 interval_start = (f64)interval / iterations; f64 interval_end = (f64)(interval + 1) / iterations; f64 u = interval_start + rng_uniform_f64() * (interval_end - interval_start); f64 sample = generate_sample(params->pdf_types[j], u, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); // Handle truncated normal out-of-bounds samples with improved approach while (params->pdf_types[j] == PDF_TRUNCNORMAL && (sample < params->min_values[j] || sample > params->max_values[j])) { // Try a new sample within the same interval for better statistical properties i32 i_rej = rng_int32(iterations); i32 j_rej = rng_int32(param_count); i32 interval_rej = intervals[i_rej * param_count + j_rej]; f64 interval_start_rej = (f64)interval_rej / iterations; f64 interval_end_rej = (f64)(interval_rej + 1) / iterations; f64 u_rej = interval_start_rej + rng_uniform_f64() * (interval_end_rej - interval_start_rej); sample = generate_sample(params->pdf_types[j], u_rej, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); } fprintf(writefile, ",%g", sample); } fprintf(writefile, "\n"); } // Free LHS intervals free(intervals); break; } // Clean up free_parameter_data(params); printf("3. %d rows and %d columns have been written to the file.\nProgram finished! Check the output!\n", iterations + 1, param_count + 1); fclose(writefile); return 0; }