From 8332d652eac5b46d8b8b02b25d47aa5355ef31cd Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 3 Aug 2025 18:25:31 +0200 Subject: added schott json --- scripts/excel_to_json.py | 241 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100755 scripts/excel_to_json.py (limited to 'scripts/excel_to_json.py') diff --git a/scripts/excel_to_json.py b/scripts/excel_to_json.py new file mode 100755 index 0000000..73e52e6 --- /dev/null +++ b/scripts/excel_to_json.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +Convert all optical glass Excel catalogs to a single JSON format for GlaMaC. +Processes all Excel files in data/Excel/ directory and extracts nd and vd values. +""" + +import pandas as pd +import json +import sys +import os +from pathlib import Path + +def convert_schott_excel(excel_path): + """Convert SCHOTT Excel catalog to glasses list.""" + df = pd.read_excel(excel_path, skiprows=6) + glasses = [] + + for _, row in df.iterrows(): + if pd.isna(row.iloc[0]) or row.iloc[0] == '': + continue + + try: + glass = { + "name": str(row.iloc[0]).strip(), + "nd": float(row.iloc[1]), + "vd": float(row.iloc[3]), + "manufacturer": "SCHOTT" + } + glasses.append(glass) + except (ValueError, IndexError, TypeError): + continue + + return glasses + +def convert_hoya_excel(excel_path): + """Convert HOYA Excel catalog to glasses list.""" + df = pd.read_excel(excel_path, skiprows=3) + glasses = [] + + for _, row in df.iterrows(): + if pd.isna(row.iloc[2]): # Glass name in column 2 + continue + + try: + glass = { + "name": str(row.iloc[2]).strip(), + "nd": float(row.iloc[4]), + "vd": float(row.iloc[5]), + "manufacturer": "HOYA" + } + glasses.append(glass) + except (ValueError, IndexError, TypeError): + continue + + return glasses + +def convert_cdgm_excel(excel_path): + """Convert CDGM Excel catalog to glasses list.""" + df = pd.read_excel(excel_path, skiprows=1) + glasses = [] + + for _, row in df.iterrows(): + if pd.isna(row.iloc[0]) or row.iloc[0] == '': + continue + + try: + # Glass name in col 0, nd at col 13 (587.56 nm), vd at col 23 + glass = { + "name": str(row.iloc[0]).strip(), + "nd": float(row.iloc[13]), # n at 587.56 nm (d-line) + "vd": float(row.iloc[23]), # Abbe number + "manufacturer": "CDGM" + } + glasses.append(glass) + except (ValueError, IndexError, TypeError): + continue + + return glasses + +def convert_ohara_excel(excel_path): + """Convert Ohara Excel catalog to glasses list.""" + df = pd.read_excel(excel_path, skiprows=1) + glasses = [] + + for _, row in df.iterrows(): + if pd.isna(row.iloc[1]) or row.iloc[1] == '': # Glass name in col 1 + continue + + try: + glass = { + "name": str(row.iloc[1]).strip(), + "nd": float(row.iloc[16]), # nd column + "vd": float(row.iloc[24]), # vd column (first valid Abbe number) + "manufacturer": "Ohara" + } + glasses.append(glass) + except (ValueError, IndexError, TypeError): + continue + + return glasses + +def detect_manufacturer(excel_path): + """Detect manufacturer from filename.""" + filename = Path(excel_path).name.lower() + + if 'schott' in filename: + return 'schott' + elif 'hoya' in filename: + return 'hoya' + elif 'cdgm' in filename: + return 'cdgm' + elif 'ohara' in filename: + return 'ohara' + else: + return 'unknown' + +def process_excel_files(input_path, output_file="glasses.json"): + """Process Excel file(s) and output single JSON.""" + + input_path = Path(input_path) + if not input_path.exists(): + print(f"Error: Path {input_path} not found") + sys.exit(1) + + # Determine if input is file or directory + if input_path.is_file(): + if input_path.suffix.lower() not in ['.xlsx', '.xls']: + print(f"Error: {input_path} is not an Excel file") + sys.exit(1) + excel_files = [input_path] + print(f"Processing single Excel file: {input_path.name}") + elif input_path.is_dir(): + # Find all Excel files in directory + excel_files = list(input_path.glob("*.xlsx")) + list(input_path.glob("*.xls")) + if not excel_files: + print(f"No Excel files found in directory {input_path}") + sys.exit(1) + print(f"Processing directory: {input_path}") + print(f"Found {len(excel_files)} Excel file(s)") + else: + print(f"Error: {input_path} is neither a file nor a directory") + sys.exit(1) + + all_glasses = [] + total_count = 0 + + for excel_file in excel_files: + manufacturer = detect_manufacturer(excel_file) + print(f"Processing {manufacturer.upper()}: {excel_file.name}") + + try: + if manufacturer == 'schott': + glasses = convert_schott_excel(excel_file) + elif manufacturer == 'hoya': + glasses = convert_hoya_excel(excel_file) + elif manufacturer == 'cdgm': + glasses = convert_cdgm_excel(excel_file) + elif manufacturer == 'ohara': + glasses = convert_ohara_excel(excel_file) + else: + print(f" Warning: Unknown manufacturer for {excel_file.name}, skipping") + continue + + print(f" Extracted {len(glasses)} glasses") + all_glasses.extend(glasses) + total_count += len(glasses) + + except Exception as e: + print(f" Error processing {excel_file.name}: {e}") + continue + + # Group glasses by manufacturer + manufacturers_data = {} + for glass in all_glasses: + manufacturer = glass["manufacturer"] + if manufacturer not in manufacturers_data: + manufacturers_data[manufacturer] = { + "glass_count": 0, + "glasses": [] + } + + # Remove manufacturer field from individual glass since it's now the key + glass_data = {k: v for k, v in glass.items() if k != "manufacturer"} + manufacturers_data[manufacturer]["glasses"].append(glass_data) + manufacturers_data[manufacturer]["glass_count"] += 1 + + # Create output structure with nested manufacturers + output_data = { + "format_version": "1.0", + "generated_date": pd.Timestamp.now().isoformat(), + "total_glasses": total_count, + "manufacturers": manufacturers_data + } + + # Ensure output directory exists + output_path = Path(output_file) + output_path.parent.mkdir(exist_ok=True) + + # Write JSON + with open(output_path, 'w') as f: + json.dump(output_data, f, indent=2, ensure_ascii=False) + + print(f"\nSummary:") + print(f" Total glasses: {total_count}") + print(f" Manufacturers: {', '.join(manufacturers_data.keys())}") + for mfg, data in manufacturers_data.items(): + print(f" {mfg}: {data['glass_count']} glasses") + print(f" Output file: {output_path}") + +def main(): + """Main function with argument parsing.""" + import argparse + + parser = argparse.ArgumentParser( + description='Convert optical glass Excel catalogs to JSON format', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Process all Excel files in a directory + python excel_to_json.py data/Excel/ + + # Process a single Excel file + python excel_to_json.py data/Excel/HOYA20210421.xlsx + + # Specify output file + python excel_to_json.py data/Excel/ -o my_glasses.json + """ + ) + + parser.add_argument('input', + help='Input Excel file or directory containing Excel files') + parser.add_argument('-o', '--output', + default='glasses.json', + help='Output JSON file (default: glasses.json in current directory)') + + args = parser.parse_args() + + process_excel_files(args.input, args.output) + +if __name__ == "__main__": + main() \ No newline at end of file -- cgit v1.2.3