#!/usr/bin/env python3 """ Convert all optical glass Excel catalogs to a single JSON format for GlaMaC. Processes all Excel files in data/Excel/ directory and extracts nd and vd values. """ import pandas as pd import json import sys import os from pathlib import Path def convert_schott_excel(excel_path): """Convert SCHOTT Excel catalog to glasses list.""" df = pd.read_excel(excel_path, skiprows=6) glasses = [] for _, row in df.iterrows(): if pd.isna(row.iloc[0]) or row.iloc[0] == '': continue try: glass = { "name": str(row.iloc[0]).strip(), "nd": float(row.iloc[1]), "vd": float(row.iloc[3]), "manufacturer": "SCHOTT" } glasses.append(glass) except (ValueError, IndexError, TypeError): continue return glasses def convert_hoya_excel(excel_path): """Convert HOYA Excel catalog to glasses list.""" df = pd.read_excel(excel_path, skiprows=3) glasses = [] for _, row in df.iterrows(): if pd.isna(row.iloc[2]): # Glass name in column 2 continue try: glass = { "name": str(row.iloc[2]).strip(), "nd": float(row.iloc[4]), "vd": float(row.iloc[5]), "manufacturer": "HOYA" } glasses.append(glass) except (ValueError, IndexError, TypeError): continue return glasses def convert_cdgm_excel(excel_path): """Convert CDGM Excel catalog to glasses list.""" df = pd.read_excel(excel_path, skiprows=1) glasses = [] for _, row in df.iterrows(): if pd.isna(row.iloc[0]) or row.iloc[0] == '': continue try: # Glass name in col 0, nd at col 13 (587.56 nm), vd at col 23 glass = { "name": str(row.iloc[0]).strip(), "nd": float(row.iloc[13]), # n at 587.56 nm (d-line) "vd": float(row.iloc[23]), # Abbe number "manufacturer": "CDGM" } glasses.append(glass) except (ValueError, IndexError, TypeError): continue return glasses def convert_ohara_excel(excel_path): """Convert Ohara Excel catalog to glasses list.""" df = pd.read_excel(excel_path, skiprows=1) glasses = [] for _, row in df.iterrows(): if pd.isna(row.iloc[1]) or row.iloc[1] == '': # Glass name in col 1 continue try: glass = { "name": str(row.iloc[1]).strip(), "nd": float(row.iloc[16]), # nd column "vd": float(row.iloc[24]), # vd column (first valid Abbe number) "manufacturer": "Ohara" } glasses.append(glass) except (ValueError, IndexError, TypeError): continue return glasses def detect_manufacturer(excel_path): """Detect manufacturer from filename.""" filename = Path(excel_path).name.lower() if 'schott' in filename: return 'schott' elif 'hoya' in filename: return 'hoya' elif 'cdgm' in filename: return 'cdgm' elif 'ohara' in filename: return 'ohara' else: return 'unknown' def process_excel_files(input_path, output_file="glasses.json"): """Process Excel file(s) and output single JSON.""" input_path = Path(input_path) if not input_path.exists(): print(f"Error: Path {input_path} not found") sys.exit(1) # Determine if input is file or directory if input_path.is_file(): if input_path.suffix.lower() not in ['.xlsx', '.xls']: print(f"Error: {input_path} is not an Excel file") sys.exit(1) excel_files = [input_path] print(f"Processing single Excel file: {input_path.name}") elif input_path.is_dir(): # Find all Excel files in directory excel_files = list(input_path.glob("*.xlsx")) + list(input_path.glob("*.xls")) if not excel_files: print(f"No Excel files found in directory {input_path}") sys.exit(1) print(f"Processing directory: {input_path}") print(f"Found {len(excel_files)} Excel file(s)") else: print(f"Error: {input_path} is neither a file nor a directory") sys.exit(1) all_glasses = [] total_count = 0 for excel_file in excel_files: manufacturer = detect_manufacturer(excel_file) print(f"Processing {manufacturer.upper()}: {excel_file.name}") try: if manufacturer == 'schott': glasses = convert_schott_excel(excel_file) elif manufacturer == 'hoya': glasses = convert_hoya_excel(excel_file) elif manufacturer == 'cdgm': glasses = convert_cdgm_excel(excel_file) elif manufacturer == 'ohara': glasses = convert_ohara_excel(excel_file) else: print(f" Warning: Unknown manufacturer for {excel_file.name}, skipping") continue print(f" Extracted {len(glasses)} glasses") all_glasses.extend(glasses) total_count += len(glasses) except Exception as e: print(f" Error processing {excel_file.name}: {e}") continue # Group glasses by manufacturer manufacturers_data = {} for glass in all_glasses: manufacturer = glass["manufacturer"] if manufacturer not in manufacturers_data: manufacturers_data[manufacturer] = { "glass_count": 0, "glasses": [] } # Remove manufacturer field from individual glass since it's now the key glass_data = {k: v for k, v in glass.items() if k != "manufacturer"} manufacturers_data[manufacturer]["glasses"].append(glass_data) manufacturers_data[manufacturer]["glass_count"] += 1 # Create output structure with nested manufacturers output_data = { "format_version": "1.0", "generated_date": pd.Timestamp.now().isoformat(), "total_glasses": total_count, "manufacturers": manufacturers_data } # Ensure output directory exists output_path = Path(output_file) output_path.parent.mkdir(exist_ok=True) # Write JSON with open(output_path, 'w') as f: json.dump(output_data, f, indent=2, ensure_ascii=False) print(f"\nSummary:") print(f" Total glasses: {total_count}") print(f" Manufacturers: {', '.join(manufacturers_data.keys())}") for mfg, data in manufacturers_data.items(): print(f" {mfg}: {data['glass_count']} glasses") print(f" Output file: {output_path}") def main(): """Main function with argument parsing.""" import argparse parser = argparse.ArgumentParser( description='Convert optical glass Excel catalogs to JSON format', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Process all Excel files in a directory python excel_to_json.py data/Excel/ # Process a single Excel file python excel_to_json.py data/Excel/HOYA20210421.xlsx # Specify output file python excel_to_json.py data/Excel/ -o my_glasses.json """ ) parser.add_argument('input', help='Input Excel file or directory containing Excel files') parser.add_argument('-o', '--output', default='glasses.json', help='Output JSON file (default: glasses.json in current directory)') args = parser.parse_args() process_excel_files(args.input, args.output) if __name__ == "__main__": main()