From 0a64ff62830495ea21bdafe33b6166c3955d35fd Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 25 Apr 2025 21:13:44 +0200 Subject: First Commit --- src/tolsac.c | 691 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 691 insertions(+) create mode 100644 src/tolsac.c (limited to 'src/tolsac.c') diff --git a/src/tolsac.c b/src/tolsac.c new file mode 100644 index 0000000..4a3a0bc --- /dev/null +++ b/src/tolsac.c @@ -0,0 +1,691 @@ +/** + * tolsac.c - source file containing creation of samples with various distributions. + * + * Copyright (C) 2025 https://optics-design.com + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * See the COPYING file for the full license text. + */ +#include +#include +#include +#include +#include "tolmacdef.h" + +// Enum for sampling methods +typedef enum { + SAMPLING_LHS, // Latin Hypercube Sampling + SAMPLING_MCS, // Monte Carlo Sampling + SAMPLING_GRID, // Grid Sampling + SAMPLING_SENS // Sensitivity Analysis +} SamplingMethod; + +// Enum for probability distribution functions +typedef enum { + PDF_UNIFORM = 'U', + PDF_NORMAL = 'N', + PDF_TRUNCNORMAL = 'T' // Truncated normal +} PdfType; + +// Parameter data structure for improved memory locality +typedef struct { + byte* pdf_types; // Probability distribution types + f64* nominal_values; // Nominal values + f64* min_values; // Minimum values + f64* max_values; // Maximum values + f64* mean_values; // Mean values + f64* stddev_values; // Standard deviation values +} ParameterData; + +// Allocate memory for parameter data structure +ParameterData* allocate_parameter_data(i32 param_count) { + ParameterData* data = (ParameterData*)malloc(sizeof(ParameterData)); + if (!data) return NULL; + + // Single contiguous allocation for all arrays for better cache locality + void* buffer = malloc( + param_count * sizeof(byte) + // pdf_types + param_count * sizeof(f64) * 5 // nominal, min, max, mean, stddev + ); + + if (!buffer) { + free(data); + return NULL; + } + + // Assign pointers to appropriate sections of the buffer + data->pdf_types = (byte*)buffer; + data->nominal_values = (f64*)(data->pdf_types + param_count); + data->min_values = data->nominal_values + param_count; + data->max_values = data->min_values + param_count; + data->mean_values = data->max_values + param_count; + data->stddev_values = data->mean_values + param_count; + + return data; +} + +// Free parameter data structure +void free_parameter_data(ParameterData* data) { + if (data) { + // Only need to free the buffer pointed to by pdf_types (first allocation) + free(data->pdf_types); + free(data); + } +} + +// Normal CDF implementation (cumulative distribution function) +static inline f64 normal_cdf(f64 x) { + // Constants for approximation + const f64 a1 = 0.254829592; + const f64 a2 = -0.284496736; + const f64 a3 = 1.421413741; + const f64 a4 = -1.453152027; + const f64 a5 = 1.061405429; + const f64 p = 0.3275911; + + // Save the sign of x + int sign = 1; + if (x < 0) { + sign = -1; + x = -x; + } + + // A&S formula 7.1.26 + f64 t = 1.0 / (1.0 + p * x); + f64 y = 1.0 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * exp(-x * x)); + + return 0.5 * (1 + sign * y); +} + +static inline f64 inverse_normal_cdf(f64 u) { + // Use the approximation of the inverse CDF of the normal distribution + const f64 a1 = -3.969683028665376e+01; + const f64 a2 = 2.209460984245205e+02; + const f64 a3 = -2.759285104469687e+02; + const f64 a4 = 1.383577518672690e+02; + const f64 a5 = -3.066479806614716e+01; + const f64 a6 = 2.506628277459239e+00; + + const f64 b1 = -5.447609879822406e+01; + const f64 b2 = 1.615858368580409e+02; + const f64 b3 = -1.556989798598866e+02; + const f64 b4 = 6.680131188771972e+01; + const f64 b5 = -1.328068155288572e+01; + + const f64 c1 = -7.784894002430293e-03; + const f64 c2 = -3.223964580411365e-01; + const f64 c3 = -2.400758277161838e+00; + const f64 c4 = -2.549732539343734e+00; + const f64 c5 = 4.374664141464968e+00; + const f64 c6 = 2.938163982698783e+00; + + const f64 d1 = 7.784695709041462e-03; + const f64 d2 = 3.224671290700398e-01; + const f64 d3 = 2.445134137142996e+00; + const f64 d4 = 3.754408661907416e+00; + + const f64 p_low = 0.02425; + const f64 p_high = 1.0 - p_low; + + f64 q, r; + + if (u < p_low) { + q = sqrt(-2.0 * log(u)); + return (((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / + ((((d1 * q + d2) * q + d3) * q + d4) * q + 1.0); + } else if (u <= p_high) { + q = u - 0.5; + r = q * q; + return (((((a1 * r + a2) * r + a3) * r + a4) * r + a5) * r + a6) * q / + (((((b1 * r + b2) * r + b3) * r + b4) * r + b5) * r + 1.0); + } else { + q = sqrt(-2.0 * log(1.0 - u)); + return -(((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / + ((((d1 * q + d2) * q + d3) * q + d4) * q + 1.0); + } +} + +// Generate sample based on distribution type +static inline f64 generate_sample(byte pdf_type, f64 u, f64 min, f64 max, f64 mean, f64 stddev) { + switch (pdf_type) { + case PDF_NORMAL: // 'N' + // For normal: mean + stddev * inverse_normal_cdf(u) + return mean + stddev * inverse_normal_cdf(u); + + case PDF_TRUNCNORMAL: // 'T' + // For truncated normal: + { + // 1. Calculate normalized min and max values + f64 z_min = (min - mean) / stddev; + f64 z_max = (max - mean) / stddev; + + // 2. Calculate CDF at min and max + f64 cdf_min = normal_cdf(z_min); + f64 cdf_max = normal_cdf(z_max); + + // 3. Scale u to be between cdf_min and cdf_max + f64 u_scaled = cdf_min + u * (cdf_max - cdf_min); + + // 4. Apply inverse CDF to get value in correct range + return mean + stddev * inverse_normal_cdf(u_scaled); + } + + case PDF_UNIFORM: // 'U' + // For uniform: min + u * (max - min) + return min + u * (max - min); + + default: + // Default to uniform if unknown + return min + u * (max - min); + } +} + +// Function to display help information +void display_help(void) { + printf("Description:\n" + "The program generates random values for parameters based on the " + "given range and the probability distribution. " + "It is expected that the output is used in another program " + "to perform tolerancing.\n\n" + "Usage:\n" + ".\\tolsamp.exe Input.csv 50 [-o Output.csv]\n" + "Where:\n" + "Input.csv is a csv input file of a specific form.\n" + "50 is the number of iterations produced (not required for the " + "grid and sensitivity samplings)\n" + "-o Output.csv specifies the output path and filename.\n" + "If not provided, output will be named Sampled_[Input.csv] in the current directory.\n" + "If you are unsure how the input should look like, " + "run the program with the -e option to generate an example " + "input file.\n\n" + "Options:\n" + "-l, -g, -m, -s are used for latin hypercube, grid, Monte Carlo " + "(random), and sensitivity samplings.\n" + "Use -m only if there are some issues with the LHS. " + "LHS usually is the best.\n" + "-o specifies the output filename (can include path).\n" + "-e is used for example csv generation.\n\n" + "Defaults:\n" + "In the default case the program runs with -l.\n\n" + "Input CSV file info:\n" + "CSV file also contains possible probability density functions " + "(U-Uniform, N-Normal, T-Truncated Normal) and the associated mean " + "and standard deviation parameters.\n" + "This program can produce symmetric normal and truncated normal distributions."); +} + +// Function to generate example file +void generate_example(void) { + FILE *writefile = fopen("Example.csv", "w"); + if (!writefile) { + printf("Error creating example file!\n"); + return; + } + + fprintf(writefile, "Iteration Number,Parameter 1 Name,Parameter 2 Name,Parameter 3 Name,Parameter 4 Name\n"); + fprintf(writefile, "Probability Density Function (U=Uniform; N=Normal/Gaussian; T=Truncated Normal),U,U,N,T\n"); + fprintf(writefile, "Nominal, 0, 0, 6, 5\n"); + fprintf(writefile, "Max, 1, 0.1, 10, 7\n"); + fprintf(writefile, "Min, -1, -0.1, 2, 3\n"); + fprintf(writefile, "Mean,0.0, 0.0, 6.0, 5.0\n"); + fprintf(writefile, "Standard Deviation, 0, 0, 1.3, 1.0\n"); + fclose(writefile); + + printf("Example.csv file generated!\n"); +} + +// Optimized LHS interval generation that transposes for better cache locality +void generate_lhs_intervals(i32 iterations, i32 param_count, i32* intervals) { + // Initialize intervals for LHS in column-major order for better cache locality + for (i32 i = 0; i < iterations; i++) { + for (i32 j = 0; j < param_count; j++) { + // Store in transposed form: intervals[i * param_count + j] = i + intervals[i * param_count + j] = i; + } + } + + // Fisher-Yates shuffle for each parameter (each column) + for (i32 j = 0; j < param_count; j++) { + for (i32 i = iterations - 1; i > 0; i--) { + i32 k = rng_int32(i + 1); + // Swap intervals[i * param_count + j] and intervals[k * param_count + j] + i32 temp = intervals[i * param_count + j]; + intervals[i * param_count + j] = intervals[k * param_count + j]; + intervals[k * param_count + j] = temp; + } + } +} + +i32 main(i32 argc, char *argv[]) { + // Initialize variables + SamplingMethod sampling_method = SAMPLING_LHS; // Default to LHS + i32 iterations = 0; + char rfilename[FILENAME_MAX] = {0}; + char wfilename[FILENAME_MAX] = {0}; + i32 input_specified = 0; + i32 output_specified = 0; + i32 expect_output_file = 0; + + // Check for no arguments + if (argc == 1) { + printf("No input provided. Use -h for help.\n"); + return 1; + } + + // Process all arguments + for (i32 i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + // Process flag arguments + switch (argv[i][1]) { + case 'h': + display_help(); + return 0; + case 'e': + generate_example(); + return 0; + case 'm': + sampling_method = SAMPLING_MCS; + break; + case 'g': + sampling_method = SAMPLING_GRID; + break; + case 'l': + sampling_method = SAMPLING_LHS; + break; + case 's': + sampling_method = SAMPLING_SENS; + break; + case 'o': + // Next argument should be the output file + expect_output_file = 1; + break; + default: + printf("Unknown option: -%c\n", argv[i][1]); + break; + } + } else if (expect_output_file) { + // This is the output file specified after -o + strcpy(wfilename, argv[i]); + output_specified = 1; + expect_output_file = 0; + } else if (strstr(argv[i], ".csv")) { + // This is an input CSV file (we now only allow one) + if (!input_specified) { + strcpy(rfilename, argv[i]); + input_specified = 1; + } else { + printf("Warning: Multiple input files specified. Using only the first one: %s\n", rfilename); + } + } else { + // Parse iteration count + i32 tmp = atoi(argv[i]); + if (tmp > 0) { + iterations = tmp; + } + } + } + + // Error checks + if (!input_specified) { + printf("Error! No input CSV provided. Use -e if you want to see how it looks like.\n"); + return 1; + } + + if (expect_output_file) { + printf("Error! Expected an output file after -o option.\n"); + return 1; + } + + if (iterations == 0 && sampling_method != SAMPLING_GRID && sampling_method != SAMPLING_SENS) { + printf("Error! You didn't provide an integer number of iterations.\n"); + return 1; + } + + // Generate default output filename if not specified + if (!output_specified) { + // Generate default output filename - handle both Windows and Unix paths + char *file_ptr = strrchr(rfilename, '\\'); + if (!file_ptr) { + file_ptr = strrchr(rfilename, '/'); + } + + strcpy(wfilename, "Sampled_"); + if (file_ptr) { + strcat(wfilename, file_ptr + 1); + } else { + strcat(wfilename, rfilename); + } + } + + // Check if input file exists + FILE *readfile = fopen(rfilename, "r"); + if (!readfile) { + printf("Error! Input file not found: %s\n", rfilename); + return 1; + } + + // Open output file + FILE *writefile = fopen(wfilename, "w"); + if (!writefile) { + printf("Error! Could not create output file: %s\n", wfilename); + fclose(readfile); + return 1; + } + + // Read the header line + char header_line[1024] = {0}; + if (!fgets(header_line, sizeof(header_line), readfile)) { + printf("Error reading header line!\n"); + fclose(readfile); + fclose(writefile); + return 1; + } + + // Write the header line to output + fputs(header_line, writefile); + + // Count parameters (commas) + i32 param_count = 0; + for (char *p = header_line; *p; p++) { + if (*p == ',') param_count++; + } + + printf("1. Total of %d parameters detected in %s. Output file is named %s.\n", param_count, rfilename, wfilename); + + // Initialize RNG with current time as seed + rng_init(0); + + // Allocate parameter data structure + ParameterData* params = allocate_parameter_data(param_count); + if (!params) { + printf("Error! Memory allocation failed for parameter data.\n"); + fclose(readfile); + fclose(writefile); + return 1; + } + + // Parse input file - using larger fixed size buffers to avoid potential overflows + char buffer[32768]; // 32KB buffer for reading lines + char *token, *err_ptr; + i32 k = 0; + + // PDFs + if (!fgets(buffer, sizeof(buffer), readfile)) { + printf("Error reading PDF line from input file!\n"); + free_parameter_data(params); + fclose(readfile); + fclose(writefile); + return 1; + } + + token = strtok(buffer, ","); + while ((token = strtok(NULL, ","))) { + params->pdf_types[k++] = *token; + } + + // Nominal + k = 0; + if (!fgets(buffer, sizeof(buffer), readfile)) { + printf("Error reading nominal values from input file!\n"); + free_parameter_data(params); + fclose(readfile); + fclose(writefile); + return 1; + } + + token = strtok(buffer, ","); + while ((token = strtok(NULL, ","))) { + params->nominal_values[k++] = strtof(token, &err_ptr); + } + + // Maxima + k = 0; + if (!fgets(buffer, sizeof(buffer), readfile)) { + printf("Error reading maximum values from input file!\n"); + free_parameter_data(params); + fclose(readfile); + fclose(writefile); + return 1; + } + + token = strtok(buffer, ","); + while ((token = strtok(NULL, ","))) { + params->max_values[k++] = strtof(token, &err_ptr); + } + + // Minima + k = 0; + if (!fgets(buffer, sizeof(buffer), readfile)) { + printf("Error reading minimum values from input file!\n"); + free_parameter_data(params); + fclose(readfile); + fclose(writefile); + return 1; + } + + token = strtok(buffer, ","); + while ((token = strtok(NULL, ","))) { + params->min_values[k++] = strtof(token, &err_ptr); + } + + // Means + k = 0; + if (!fgets(buffer, sizeof(buffer), readfile)) { + printf("Error reading mean values from input file!\n"); + free_parameter_data(params); + fclose(readfile); + fclose(writefile); + return 1; + } + + token = strtok(buffer, ","); + while ((token = strtok(NULL, ","))) { + params->mean_values[k++] = strtof(token, &err_ptr); + } + + // Standard Deviations + k = 0; + if (!fgets(buffer, sizeof(buffer), readfile)) { + printf("Error reading standard deviation values from input file!\n"); + free_parameter_data(params); + fclose(readfile); + fclose(writefile); + return 1; + } + + token = strtok(buffer, ","); + while ((token = strtok(NULL, ","))) { + params->stddev_values[k++] = strtof(token, &err_ptr); + } + + fclose(readfile); + printf("2. Input CSV successfully parsed.\n"); + + // Write initial reference row + fprintf(writefile, "1"); + for (i32 j = 0; j < param_count; j++) { + fprintf(writefile, ",%g", params->nominal_values[j]); + } + fprintf(writefile, "\n"); + + // Generate and write samples based on sampling method + switch (sampling_method) { + case SAMPLING_MCS: + // Monte Carlo sampling + for (i32 i = 0; i < iterations; i++) { + // Write row number + fprintf(writefile, "%d", i + 2); + + // Generate samples for each parameter + for (i32 j = 0; j < param_count; j++) { + f64 u = rng_uniform_f64(); + f64 sample = generate_sample(params->pdf_types[j], u, + params->min_values[j], params->max_values[j], + params->mean_values[j], params->stddev_values[j]); + + // If truncated normal and out of bounds, use uniform random between min and max + if (params->pdf_types[j] == PDF_TRUNCNORMAL && + (sample < params->min_values[j] || sample > params->max_values[j])) { + f64 u_uniform = rng_uniform_f64(); + sample = generate_sample('U', u_uniform, + params->min_values[j], params->max_values[j], + params->mean_values[j], params->stddev_values[j]); + } + + fprintf(writefile, ",%g", sample); + } + + fprintf(writefile, "\n"); + } + break; + + case SAMPLING_GRID: + // Handle grid sampling iterations for large parameter counts + if (param_count >= 30) { // 2^30 would be over a billion iterations + printf("Error: Grid sampling with %d parameters would result in too many iterations (2^%d).\n", + param_count, param_count); + free_parameter_data(params); + fclose(writefile); + return 1; + } else { + iterations = 1 << param_count; + if (param_count > 17) { + char response; + printf("You have %d parameters, which corresponds to %d iterations. Are you sure this is what you want? (y/n): ", + param_count, iterations); + scanf("%c", &response); + if (response != 'y') { + printf("Quitting.\n"); + free_parameter_data(params); + fclose(writefile); + return 1; + } + } + } + + // Grid sampling + for (i32 i = 0; i < iterations; i++) { + // Write row number + fprintf(writefile, "%d", i + 2); + + // Generate samples for each parameter + for (i32 j = 0; j < param_count; j++) { + f64 sample; + + // Safe bit manipulation even for large parameter counts + if ((i >> j) & 1) { + sample = params->max_values[j]; + } else { + sample = params->min_values[j]; + } + + fprintf(writefile, ",%g", sample); + } + + fprintf(writefile, "\n"); + } + break; + + case SAMPLING_SENS: + // Calculate number of iterations needed: 2 * number of parameters + iterations = 2 * param_count; + + // For each parameter + for (i32 j = 0; j < param_count; j++) { + // Parameter at minimum, others at nominal + fprintf(writefile, "%d", j*2 + 2); + for (i32 k = 0; k < param_count; k++) { + if (k == j) { + // This parameter at minimum + fprintf(writefile, ",%g", params->min_values[k]); + } else { + // Others at nominal + fprintf(writefile, ",%g", params->nominal_values[k]); + } + } + fprintf(writefile, "\n"); + + // Parameter at maximum, others at nominal + fprintf(writefile, "%d", j*2 + 3); + for (i32 k = 0; k < param_count; k++) { + if (k == j) { + // This parameter at maximum + fprintf(writefile, ",%g", params->max_values[k]); + } else { + // Others at nominal + fprintf(writefile, ",%g", params->nominal_values[k]); + } + } + fprintf(writefile, "\n"); + } + break; + + case SAMPLING_LHS: + // Allocate intervals for LHS (transposed for better cache locality) + i32 *intervals = (i32*)malloc(iterations * param_count * sizeof(i32)); + if (!intervals) { + printf("Error! Memory allocation failed for intervals.\n"); + free_parameter_data(params); + fclose(writefile); + return 1; + } + + // Generate LHS intervals with improved cache locality + generate_lhs_intervals(iterations, param_count, intervals); + + // Latin Hypercube sampling + for (i32 i = 0; i < iterations; i++) { + // Write row number + fprintf(writefile, "%d", i + 2); + + // Generate samples for each parameter + for (i32 j = 0; j < param_count; j++) { + // Get interval for this parameter (accessing in transposed form) + i32 interval = intervals[i * param_count + j]; + f64 interval_start = (f64)interval / iterations; + f64 interval_end = (f64)(interval + 1) / iterations; + f64 u = interval_start + rng_uniform_f64() * (interval_end - interval_start); + + f64 sample = generate_sample(params->pdf_types[j], u, + params->min_values[j], params->max_values[j], + params->mean_values[j], params->stddev_values[j]); + + // Handle truncated normal out-of-bounds samples with improved approach + while (params->pdf_types[j] == PDF_TRUNCNORMAL && + (sample < params->min_values[j] || sample > params->max_values[j])) { + // Try a new sample within the same interval for better statistical properties + i32 i_rej = rng_int32(iterations); + i32 j_rej = rng_int32(param_count); + i32 interval_rej = intervals[i_rej * param_count + j_rej]; + f64 interval_start_rej = (f64)interval_rej / iterations; + f64 interval_end_rej = (f64)(interval_rej + 1) / iterations; + f64 u_rej = interval_start_rej + rng_uniform_f64() * (interval_end_rej - interval_start_rej); + sample = generate_sample(params->pdf_types[j], u_rej, + params->min_values[j], params->max_values[j], + params->mean_values[j], params->stddev_values[j]); + } + + fprintf(writefile, ",%g", sample); + } + + fprintf(writefile, "\n"); + } + + // Free LHS intervals + free(intervals); + break; + } + + // Clean up + free_parameter_data(params); + + printf("3. %d rows and %d columns have been written to the file.\nProgram finished! Check the output!\n", + iterations + 1, param_count + 1); + fclose(writefile); + + return 0; +} -- cgit v1.2.3