/** * tolsac.c - source file containing creation of samples with various distributions. * * Copyright (C) 2025 https://optics-design.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * See the COPYING file for the full license text. */ #include #include #include #include #include "tolmacdef.h" // Enum for sampling methods typedef enum { SAMPLING_LHS, // Latin Hypercube Sampling SAMPLING_MCS, // Monte Carlo Sampling SAMPLING_GRID, // Grid Sampling SAMPLING_SENS // Sensitivity Analysis } SamplingMethod; // Enum for probability distribution functions typedef enum { PDF_UNIFORM = 'U', PDF_NORMAL = 'N', PDF_TRUNCNORMAL = 'T' // Truncated normal } PdfType; // Parameter data structure for improved memory locality typedef struct { byte* pdf_types; // Probability distribution types f64* nominal_values; // Nominal values f64* min_values; // Minimum values f64* max_values; // Maximum values f64* mean_values; // Mean values f64* stddev_values; // Standard deviation values } ParameterData; // Allocate memory for parameter data structure ParameterData* allocate_parameter_data(i32 param_count) { ParameterData* data = (ParameterData*)malloc(sizeof(ParameterData)); if (!data) return NULL; // Single contiguous allocation for all arrays for better cache locality void* buffer = malloc( param_count * sizeof(byte) + // pdf_types param_count * sizeof(f64) * 5 // nominal, min, max, mean, stddev ); if (!buffer) { free(data); return NULL; } // Assign pointers to appropriate sections of the buffer data->pdf_types = (byte*)buffer; data->nominal_values = (f64*)(data->pdf_types + param_count); data->min_values = data->nominal_values + param_count; data->max_values = data->min_values + param_count; data->mean_values = data->max_values + param_count; data->stddev_values = data->mean_values + param_count; return data; } // Free parameter data structure void free_parameter_data(ParameterData* data) { if (data) { // Only need to free the buffer pointed to by pdf_types (first allocation) free(data->pdf_types); free(data); } } // Normal CDF implementation (cumulative distribution function) static inline f64 normal_cdf(f64 x) { // Constants for approximation const f64 a1 = 0.254829592; const f64 a2 = -0.284496736; const f64 a3 = 1.421413741; const f64 a4 = -1.453152027; const f64 a5 = 1.061405429; const f64 p = 0.3275911; // Save the sign of x int sign = 1; if (x < 0) { sign = -1; x = -x; } // A&S formula 7.1.26 f64 t = 1.0 / (1.0 + p * x); f64 y = 1.0 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * exp(-x * x)); return 0.5 * (1 + sign * y); } static inline f64 inverse_normal_cdf(f64 u) { // Use the approximation of the inverse CDF of the normal distribution const f64 a1 = -3.969683028665376e+01; const f64 a2 = 2.209460984245205e+02; const f64 a3 = -2.759285104469687e+02; const f64 a4 = 1.383577518672690e+02; const f64 a5 = -3.066479806614716e+01; const f64 a6 = 2.506628277459239e+00; const f64 b1 = -5.447609879822406e+01; const f64 b2 = 1.615858368580409e+02; const f64 b3 = -1.556989798598866e+02; const f64 b4 = 6.680131188771972e+01; const f64 b5 = -1.328068155288572e+01; const f64 c1 = -7.784894002430293e-03; const f64 c2 = -3.223964580411365e-01; const f64 c3 = -2.400758277161838e+00; const f64 c4 = -2.549732539343734e+00; const f64 c5 = 4.374664141464968e+00; const f64 c6 = 2.938163982698783e+00; const f64 d1 = 7.784695709041462e-03; const f64 d2 = 3.224671290700398e-01; const f64 d3 = 2.445134137142996e+00; const f64 d4 = 3.754408661907416e+00; const f64 p_low = 0.02425; const f64 p_high = 1.0 - p_low; f64 q, r; if (u < p_low) { q = sqrt(-2.0 * log(u)); return (((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1.0); } else if (u <= p_high) { q = u - 0.5; r = q * q; return (((((a1 * r + a2) * r + a3) * r + a4) * r + a5) * r + a6) * q / (((((b1 * r + b2) * r + b3) * r + b4) * r + b5) * r + 1.0); } else { q = sqrt(-2.0 * log(1.0 - u)); return -(((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1.0); } } // Generate sample based on distribution type static inline f64 generate_sample(byte pdf_type, f64 u, f64 min, f64 max, f64 mean, f64 stddev) { switch (pdf_type) { case PDF_NORMAL: // 'N' // For normal: mean + stddev * inverse_normal_cdf(u) return mean + stddev * inverse_normal_cdf(u); case PDF_TRUNCNORMAL: // 'T' // For truncated normal: { // 1. Calculate normalized min and max values f64 z_min = (min - mean) / stddev; f64 z_max = (max - mean) / stddev; // 2. Calculate CDF at min and max f64 cdf_min = normal_cdf(z_min); f64 cdf_max = normal_cdf(z_max); // 3. Scale u to be between cdf_min and cdf_max f64 u_scaled = cdf_min + u * (cdf_max - cdf_min); // 4. Apply inverse CDF to get value in correct range return mean + stddev * inverse_normal_cdf(u_scaled); } case PDF_UNIFORM: // 'U' // For uniform: min + u * (max - min) return min + u * (max - min); default: // Default to uniform if unknown return min + u * (max - min); } } // Function to display help information void display_help(void) { printf("Description:\n" "The program generates random values for parameters based on the " "given range and the probability distribution. " "It is expected that the output is used in another program " "to perform tolerancing.\n\n" "Usage:\n" ".\\tolsamp.exe Input.csv 50 [-o Output.csv]\n" "Where:\n" "Input.csv is a csv input file of a specific form.\n" "50 is the number of iterations produced (not required for the " "grid and sensitivity samplings)\n" "-o Output.csv specifies the output path and filename.\n" "If not provided, output will be named Sampled_[Input.csv] in the current directory.\n" "If you are unsure how the input should look like, " "run the program with the -e option to generate an example " "input file.\n\n" "Options:\n" "-l, -g, -m, -s are used for latin hypercube, grid, Monte Carlo " "(random), and sensitivity samplings.\n" "Use -m only if there are some issues with the LHS. " "LHS usually is the best.\n" "-o specifies the output filename (can include path).\n" "-e is used for example csv generation.\n\n" "Defaults:\n" "In the default case the program runs with -l.\n\n" "Input CSV file info:\n" "CSV file also contains possible probability density functions " "(U-Uniform, N-Normal, T-Truncated Normal) and the associated mean " "and standard deviation parameters.\n" "This program can produce symmetric normal and truncated normal distributions."); } // Function to generate example file void generate_example(void) { FILE *writefile = fopen("Example.csv", "w"); if (!writefile) { printf("Error creating example file!\n"); return; } fprintf(writefile, "Iteration Number,Parameter 1 Name,Parameter 2 Name,Parameter 3 Name,Parameter 4 Name\n"); fprintf(writefile, "Probability Density Function (U=Uniform; N=Normal/Gaussian; T=Truncated Normal),U,U,N,T\n"); fprintf(writefile, "Nominal, 0, 0, 6, 5\n"); fprintf(writefile, "Max, 1, 0.1, 10, 7\n"); fprintf(writefile, "Min, -1, -0.1, 2, 3\n"); fprintf(writefile, "Mean,0.0, 0.0, 6.0, 5.0\n"); fprintf(writefile, "Standard Deviation, 0, 0, 1.3, 1.0\n"); fclose(writefile); printf("Example.csv file generated!\n"); } // Optimized LHS interval generation that transposes for better cache locality void generate_lhs_intervals(i32 iterations, i32 param_count, i32* intervals) { // Initialize intervals for LHS in column-major order for better cache locality for (i32 i = 0; i < iterations; i++) { for (i32 j = 0; j < param_count; j++) { // Store in transposed form: intervals[i * param_count + j] = i intervals[i * param_count + j] = i; } } // Fisher-Yates shuffle for each parameter (each column) for (i32 j = 0; j < param_count; j++) { for (i32 i = iterations - 1; i > 0; i--) { i32 k = rng_int32(i + 1); // Swap intervals[i * param_count + j] and intervals[k * param_count + j] i32 temp = intervals[i * param_count + j]; intervals[i * param_count + j] = intervals[k * param_count + j]; intervals[k * param_count + j] = temp; } } } i32 main(i32 argc, char *argv[]) { // Initialize variables SamplingMethod sampling_method = SAMPLING_LHS; // Default to LHS i32 iterations = 0; char rfilename[FILENAME_MAX] = {0}; char wfilename[FILENAME_MAX] = {0}; i32 input_specified = 0; i32 output_specified = 0; i32 expect_output_file = 0; // Check for no arguments if (argc == 1) { printf("No input provided. Use -h for help.\n"); return 1; } // Process all arguments for (i32 i = 1; i < argc; i++) { if (argv[i][0] == '-') { // Process flag arguments switch (argv[i][1]) { case 'h': display_help(); return 0; case 'e': generate_example(); return 0; case 'm': sampling_method = SAMPLING_MCS; break; case 'g': sampling_method = SAMPLING_GRID; break; case 'l': sampling_method = SAMPLING_LHS; break; case 's': sampling_method = SAMPLING_SENS; break; case 'o': // Next argument should be the output file expect_output_file = 1; break; default: printf("Unknown option: -%c\n", argv[i][1]); break; } } else if (expect_output_file) { // This is the output file specified after -o strcpy(wfilename, argv[i]); output_specified = 1; expect_output_file = 0; } else if (strstr(argv[i], ".csv")) { // This is an input CSV file (we now only allow one) if (!input_specified) { strcpy(rfilename, argv[i]); input_specified = 1; } else { printf("Warning: Multiple input files specified. Using only the first one: %s\n", rfilename); } } else { // Parse iteration count i32 tmp = atoi(argv[i]); if (tmp > 0) { iterations = tmp; } } } // Error checks if (!input_specified) { printf("Error! No input CSV provided. Use -e if you want to see how it looks like.\n"); return 1; } if (expect_output_file) { printf("Error! Expected an output file after -o option.\n"); return 1; } if (iterations == 0 && sampling_method != SAMPLING_GRID && sampling_method != SAMPLING_SENS) { printf("Error! You didn't provide an integer number of iterations.\n"); return 1; } // Generate default output filename if not specified if (!output_specified) { // Generate default output filename - handle both Windows and Unix paths char *file_ptr = strrchr(rfilename, '\\'); if (!file_ptr) { file_ptr = strrchr(rfilename, '/'); } strcpy(wfilename, "Sampled_"); if (file_ptr) { strcat(wfilename, file_ptr + 1); } else { strcat(wfilename, rfilename); } } // Check if input file exists FILE *readfile = fopen(rfilename, "r"); if (!readfile) { printf("Error! Input file not found: %s\n", rfilename); return 1; } // Open output file FILE *writefile = fopen(wfilename, "w"); if (!writefile) { printf("Error! Could not create output file: %s\n", wfilename); fclose(readfile); return 1; } // Read the header line char header_line[1024] = {0}; if (!fgets(header_line, sizeof(header_line), readfile)) { printf("Error reading header line!\n"); fclose(readfile); fclose(writefile); return 1; } // Write the header line to output fputs(header_line, writefile); // Count parameters (commas) i32 param_count = 0; for (char *p = header_line; *p; p++) { if (*p == ',') param_count++; } printf("1. Total of %d parameters detected in %s. Output file is named %s.\n", param_count, rfilename, wfilename); // Initialize RNG with current time as seed rng_init(0); // Allocate parameter data structure ParameterData* params = allocate_parameter_data(param_count); if (!params) { printf("Error! Memory allocation failed for parameter data.\n"); fclose(readfile); fclose(writefile); return 1; } // Parse input file - using larger fixed size buffers to avoid potential overflows char buffer[32768]; // 32KB buffer for reading lines char *token, *err_ptr; i32 k = 0; // PDFs if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading PDF line from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->pdf_types[k++] = *token; } // Nominal k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading nominal values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->nominal_values[k++] = strtof(token, &err_ptr); } // Maxima k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading maximum values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->max_values[k++] = strtof(token, &err_ptr); } // Minima k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading minimum values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->min_values[k++] = strtof(token, &err_ptr); } // Means k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading mean values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->mean_values[k++] = strtof(token, &err_ptr); } // Standard Deviations k = 0; if (!fgets(buffer, sizeof(buffer), readfile)) { printf("Error reading standard deviation values from input file!\n"); free_parameter_data(params); fclose(readfile); fclose(writefile); return 1; } token = strtok(buffer, ","); while ((token = strtok(NULL, ","))) { params->stddev_values[k++] = strtof(token, &err_ptr); } fclose(readfile); printf("2. Input CSV successfully parsed.\n"); // Write initial reference row fprintf(writefile, "1"); for (i32 j = 0; j < param_count; j++) { fprintf(writefile, ",%g", params->nominal_values[j]); } fprintf(writefile, "\n"); // Generate and write samples based on sampling method switch (sampling_method) { case SAMPLING_MCS: // Monte Carlo sampling for (i32 i = 0; i < iterations; i++) { // Write row number fprintf(writefile, "%d", i + 2); // Generate samples for each parameter for (i32 j = 0; j < param_count; j++) { f64 u = rng_uniform_f64(); f64 sample = generate_sample(params->pdf_types[j], u, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); // If truncated normal and out of bounds, use uniform random between min and max if (params->pdf_types[j] == PDF_TRUNCNORMAL && (sample < params->min_values[j] || sample > params->max_values[j])) { f64 u_uniform = rng_uniform_f64(); sample = generate_sample('U', u_uniform, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); } fprintf(writefile, ",%g", sample); } fprintf(writefile, "\n"); } break; case SAMPLING_GRID: // Handle grid sampling iterations for large parameter counts if (param_count >= 30) { // 2^30 would be over a billion iterations printf("Error: Grid sampling with %d parameters would result in too many iterations (2^%d).\n", param_count, param_count); free_parameter_data(params); fclose(writefile); return 1; } else { iterations = 1 << param_count; if (param_count > 17) { char response; printf("You have %d parameters, which corresponds to %d iterations. Are you sure this is what you want? (y/n): ", param_count, iterations); scanf("%c", &response); if (response != 'y') { printf("Quitting.\n"); free_parameter_data(params); fclose(writefile); return 1; } } } // Grid sampling for (i32 i = 0; i < iterations; i++) { // Write row number fprintf(writefile, "%d", i + 2); // Generate samples for each parameter for (i32 j = 0; j < param_count; j++) { f64 sample; // Safe bit manipulation even for large parameter counts if ((i >> j) & 1) { sample = params->max_values[j]; } else { sample = params->min_values[j]; } fprintf(writefile, ",%g", sample); } fprintf(writefile, "\n"); } break; case SAMPLING_SENS: // Calculate number of iterations needed: 2 * number of parameters iterations = 2 * param_count; // For each parameter for (i32 j = 0; j < param_count; j++) { // Parameter at minimum, others at nominal fprintf(writefile, "%d", j*2 + 2); for (i32 k = 0; k < param_count; k++) { if (k == j) { // This parameter at minimum fprintf(writefile, ",%g", params->min_values[k]); } else { // Others at nominal fprintf(writefile, ",%g", params->nominal_values[k]); } } fprintf(writefile, "\n"); // Parameter at maximum, others at nominal fprintf(writefile, "%d", j*2 + 3); for (i32 k = 0; k < param_count; k++) { if (k == j) { // This parameter at maximum fprintf(writefile, ",%g", params->max_values[k]); } else { // Others at nominal fprintf(writefile, ",%g", params->nominal_values[k]); } } fprintf(writefile, "\n"); } break; case SAMPLING_LHS: // Allocate intervals for LHS (transposed for better cache locality) i32 *intervals = (i32*)malloc(iterations * param_count * sizeof(i32)); if (!intervals) { printf("Error! Memory allocation failed for intervals.\n"); free_parameter_data(params); fclose(writefile); return 1; } // Generate LHS intervals with improved cache locality generate_lhs_intervals(iterations, param_count, intervals); // Latin Hypercube sampling for (i32 i = 0; i < iterations; i++) { // Write row number fprintf(writefile, "%d", i + 2); // Generate samples for each parameter for (i32 j = 0; j < param_count; j++) { // Get interval for this parameter (accessing in transposed form) i32 interval = intervals[i * param_count + j]; f64 interval_start = (f64)interval / iterations; f64 interval_end = (f64)(interval + 1) / iterations; f64 u = interval_start + rng_uniform_f64() * (interval_end - interval_start); f64 sample = generate_sample(params->pdf_types[j], u, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); // Handle truncated normal out-of-bounds samples with improved approach while (params->pdf_types[j] == PDF_TRUNCNORMAL && (sample < params->min_values[j] || sample > params->max_values[j])) { // Try a new sample within the same interval for better statistical properties i32 i_rej = rng_int32(iterations); i32 j_rej = rng_int32(param_count); i32 interval_rej = intervals[i_rej * param_count + j_rej]; f64 interval_start_rej = (f64)interval_rej / iterations; f64 interval_end_rej = (f64)(interval_rej + 1) / iterations; f64 u_rej = interval_start_rej + rng_uniform_f64() * (interval_end_rej - interval_start_rej); sample = generate_sample(params->pdf_types[j], u_rej, params->min_values[j], params->max_values[j], params->mean_values[j], params->stddev_values[j]); } fprintf(writefile, ",%g", sample); } fprintf(writefile, "\n"); } // Free LHS intervals free(intervals); break; } // Clean up free_parameter_data(params); printf("3. %d rows and %d columns have been written to the file.\nProgram finished! Check the output!\n", iterations + 1, param_count + 1); fclose(writefile); return 0; }