#!/usr/bin/env python3 """ Convert all optical glass Excel catalogs to a single JSON format for GlaMaC. Processes all Excel files in data/Excel/ directory and extracts nd and vd values. """ import pandas as pd import json import sys import os from pathlib import Path def convert_schott_excel(excel_path): """Convert SCHOTT Excel catalog to glasses list with comprehensive data extraction.""" glasses = [] # Process both preferred and inquiry sheets sheet_info = [ ("Preferred glasses", "preferred"), ("Inquiry glasses", "inquiry") ] for sheet_name, availability in sheet_info: try: print(f" Processing sheet: {sheet_name}") df = pd.read_excel(excel_path, sheet_name=sheet_name, skiprows=3) for _, row in df.iterrows(): # Skip empty rows if pd.isna(row.iloc[0]) or str(row.iloc[0]).strip() == '': continue try: glass_name = str(row.iloc[0]).strip() # Basic glass properties glass = { "name": glass_name, "nd": safe_float(row.iloc[1]), # nd "vd": safe_float(row.iloc[3]), # vd "manufacturer": "SCHOTT", "availability": availability } # Add glass code if available (column 158) if len(row) > 158 and not pd.isna(row.iloc[158]): glass_code_raw = str(row.iloc[158]).strip() # SCHOTT uses format like "517642.251" where first part is the 6-digit code if '.' in glass_code_raw: integer_part = glass_code_raw.split('.')[0] glass_code_digits = ''.join(c for c in integer_part if c.isdigit()) if len(glass_code_digits) > 0: glass["glass_code"] = glass_code_digits.zfill(6) else: # Fallback for codes without decimal point glass_code_digits = ''.join(c for c in glass_code_raw if c.isdigit()) if len(glass_code_digits) >= 6: glass["glass_code"] = glass_code_digits[:6] elif len(glass_code_digits) > 0: glass["glass_code"] = glass_code_digits.zfill(6) # Additional refractive indices refractive_indices = {} if not pd.isna(row.iloc[2]): refractive_indices["ne"] = safe_float(row.iloc[2]) if not pd.isna(row.iloc[4]): refractive_indices["ve"] = safe_float(row.iloc[4]) # Add spectral refractive indices (columns 116-138) spectral_indices = { "n2325.4": safe_float(row.iloc[116]) if len(row) > 116 else None, "n1970.1": safe_float(row.iloc[117]) if len(row) > 117 else None, "n1529.6": safe_float(row.iloc[118]) if len(row) > 118 else None, "n1060.0": safe_float(row.iloc[119]) if len(row) > 119 else None, "nt": safe_float(row.iloc[120]) if len(row) > 120 else None, "ns": safe_float(row.iloc[121]) if len(row) > 121 else None, "nr": safe_float(row.iloc[122]) if len(row) > 122 else None, "nC": safe_float(row.iloc[123]) if len(row) > 123 else None, "nC_prime": safe_float(row.iloc[124]) if len(row) > 124 else None, "n632.8": safe_float(row.iloc[125]) if len(row) > 125 else None, "nD": safe_float(row.iloc[126]) if len(row) > 126 else None, "nd_spectral": safe_float(row.iloc[127]) if len(row) > 127 else None, "ne_spectral": safe_float(row.iloc[128]) if len(row) > 128 else None, "nF": safe_float(row.iloc[129]) if len(row) > 129 else None, "nF_prime": safe_float(row.iloc[130]) if len(row) > 130 else None, "ng": safe_float(row.iloc[131]) if len(row) > 131 else None, "nh": safe_float(row.iloc[132]) if len(row) > 132 else None, "ni": safe_float(row.iloc[133]) if len(row) > 133 else None, "n334.1": safe_float(row.iloc[134]) if len(row) > 134 else None, "n312.6": safe_float(row.iloc[135]) if len(row) > 135 else None, "n296.7": safe_float(row.iloc[136]) if len(row) > 136 else None, "n280.4": safe_float(row.iloc[137]) if len(row) > 137 else None, "n248.3": safe_float(row.iloc[138]) if len(row) > 138 else None } # Filter out None values spectral_indices = {k: v for k, v in spectral_indices.items() if v is not None} if spectral_indices: refractive_indices.update(spectral_indices) if refractive_indices: glass["refractive_indices"] = refractive_indices # Color code if not pd.isna(row.iloc[5]): glass["colour_code"] = str(row.iloc[5]) # Dispersion coefficients (Schott formula) dispersion = {} if not pd.isna(row.iloc[6]): dispersion["B1"] = safe_float(row.iloc[6]) if not pd.isna(row.iloc[7]): dispersion["B2"] = safe_float(row.iloc[7]) if not pd.isna(row.iloc[8]): dispersion["B3"] = safe_float(row.iloc[8]) if not pd.isna(row.iloc[9]): dispersion["C1"] = safe_float(row.iloc[9]) if not pd.isna(row.iloc[10]): dispersion["C2"] = safe_float(row.iloc[10]) if not pd.isna(row.iloc[11]): dispersion["C3"] = safe_float(row.iloc[11]) if dispersion: dispersion["dispersion_formula"] = "Schott" glass["dispersion"] = dispersion # Temperature coefficients temperature_coeffs = {} if not pd.isna(row.iloc[12]): temperature_coeffs["D0"] = safe_float(row.iloc[12]) if not pd.isna(row.iloc[13]): temperature_coeffs["D1"] = safe_float(row.iloc[13]) if not pd.isna(row.iloc[14]): temperature_coeffs["D2"] = safe_float(row.iloc[14]) if not pd.isna(row.iloc[15]): temperature_coeffs["E0"] = safe_float(row.iloc[15]) if not pd.isna(row.iloc[16]): temperature_coeffs["E1"] = safe_float(row.iloc[16]) if not pd.isna(row.iloc[17]): temperature_coeffs["lambda"] = safe_float(row.iloc[17]) if temperature_coeffs: glass["temperature_coefficients"] = temperature_coeffs # Transmission data (TAUI25 and TAUI10 series) transmission = {} # TAUI25 series (columns 37-66) taui25 = {} wavelengths_25 = ["2500", "2325", "1970", "1530", "1060", "700", "660", "620", "580", "546", "500", "460", "436", "420", "405", "400", "390", "380", "370", "365", "350", "334", "320", "310", "300", "290", "280", "270", "260", "250"] for i, wl in enumerate(wavelengths_25): if len(row) > 37 + i: val = safe_float(row.iloc[37 + i]) if val is not None: taui25[f"{wl}nm"] = val # TAUI10 series (columns 67-96) taui10 = {} for i, wl in enumerate(wavelengths_25): if len(row) > 67 + i: val = safe_float(row.iloc[67 + i]) if val is not None: taui10[f"{wl}nm"] = val if taui25: transmission["TAUI25"] = taui25 if taui10: transmission["TAUI10"] = taui10 if transmission: glass["transmission"] = transmission # Chemical resistance (columns 98-102) chemical_resistance = {} if len(row) > 98 and not pd.isna(row.iloc[98]): chemical_resistance["CR"] = safe_float(row.iloc[98]) if len(row) > 99 and not pd.isna(row.iloc[99]): chemical_resistance["FR"] = safe_float(row.iloc[99]) if len(row) > 100 and not pd.isna(row.iloc[100]): chemical_resistance["SR"] = safe_float(row.iloc[100]) if len(row) > 101 and not pd.isna(row.iloc[101]): chemical_resistance["AR"] = safe_float(row.iloc[101]) if len(row) > 102 and not pd.isna(row.iloc[102]): chemical_resistance["PR"] = safe_float(row.iloc[102]) if chemical_resistance: glass["chemical_resistance"] = chemical_resistance # Physical properties physical_properties = {} if len(row) > 103 and not pd.isna(row.iloc[103]): physical_properties["density"] = safe_float(row.iloc[103]) if len(row) > 104 and not pd.isna(row.iloc[104]): physical_properties["Tg"] = safe_float(row.iloc[104]) if len(row) > 105 and not pd.isna(row.iloc[105]): physical_properties["T13"] = safe_float(row.iloc[105]) if len(row) > 106 and not pd.isna(row.iloc[106]): physical_properties["T7_6"] = safe_float(row.iloc[106]) if len(row) > 107 and not pd.isna(row.iloc[107]): physical_properties["heat_capacity"] = safe_float(row.iloc[107]) if len(row) > 108 and not pd.isna(row.iloc[108]): physical_properties["heat_conductivity"] = safe_float(row.iloc[108]) if len(row) > 109 and not pd.isna(row.iloc[109]): physical_properties["alpha_minus30_70"] = safe_float(row.iloc[109]) if len(row) > 110 and not pd.isna(row.iloc[110]): physical_properties["alpha_20_300"] = safe_float(row.iloc[110]) if physical_properties: glass["physical_properties"] = physical_properties # Mechanical properties mechanical_properties = {} if len(row) > 111 and not pd.isna(row.iloc[111]): mechanical_properties["youngs_modulus"] = safe_float(row.iloc[111]) if len(row) > 112 and not pd.isna(row.iloc[112]): mechanical_properties["poisson_ratio"] = safe_float(row.iloc[112]) if len(row) > 113 and not pd.isna(row.iloc[113]): mechanical_properties["knoop_hardness"] = safe_float(row.iloc[113]) if len(row) > 114 and not pd.isna(row.iloc[114]): mechanical_properties["abrasion_hardness"] = safe_float(row.iloc[114]) if len(row) > 36 and not pd.isna(row.iloc[36]): mechanical_properties["stress_optical_coefficient"] = safe_float(row.iloc[36]) if mechanical_properties: glass["mechanical_properties"] = mechanical_properties # Partial dispersions and ratios (columns 139-157) partial_dispersions = {} pd_names = ["nF_minus_nC", "nF_prime_minus_nC_prime", "P_s_t", "P_C_s", "P_d_C", "P_e_d", "P_g_F", "P_i_h", "P_prime_s_t", "P_prime_C_prime_s", "P_prime_d_C_prime", "P_prime_e_d", "P_prime_g_F_prime", "P_prime_i_h", "DP_C_t", "DP_C_s", "DP_F_e", "DP_g_F", "DP_i_g"] for i, pd_name in enumerate(pd_names): if len(row) > 139 + i: val = safe_float(row.iloc[139 + i]) if val is not None: partial_dispersions[pd_name] = val if partial_dispersions: glass["partial_dispersions"] = partial_dispersions # Additional properties additional_properties = {} if len(row) > 159 and not pd.isna(row.iloc[159]): additional_properties["remarks"] = str(row.iloc[159]) if len(row) > 160 and not pd.isna(row.iloc[160]): additional_properties["date"] = str(row.iloc[160]) if len(row) > 161 and not pd.isna(row.iloc[161]): additional_properties["relative_price"] = safe_float(row.iloc[161]) # Quality ratings if len(row) > 162 and not pd.isna(row.iloc[162]): additional_properties["WR_J"] = safe_float(row.iloc[162]) if len(row) > 163 and not pd.isna(row.iloc[163]): additional_properties["SR_J"] = safe_float(row.iloc[163]) if len(row) > 164 and not pd.isna(row.iloc[164]): additional_properties["AT"] = safe_float(row.iloc[164]) if len(row) > 165 and not pd.isna(row.iloc[165]): additional_properties["abrasion_HG_J"] = safe_float(row.iloc[165]) if additional_properties: glass["additional_properties"] = additional_properties # Only add glasses with valid nd and vd if glass["nd"] is not None and glass["vd"] is not None: glasses.append(glass) except (ValueError, IndexError, TypeError) as e: print(f" Warning: Skipping glass '{glass_name}': {e}") continue except Exception as e: print(f" Error processing sheet {sheet_name}: {e}") continue return glasses def safe_float(value): """Safely convert value to float, return None if not possible.""" if pd.isna(value) or value == '' or value == ' ': return None try: return float(value) except (ValueError, TypeError): return None def extract_glass_code(code_value, manufacturer=""): """Extract and format glass code to 6-digit string with manufacturer-specific handling.""" if pd.isna(code_value) or code_value == '' or code_value == ' ': return None try: code_str = str(code_value).strip() # Handle different manufacturer formats: if manufacturer.upper() == "CDGM": # CDGM uses format like "51270.0" which should become "051270" # Split on decimal point and take integer part, then pad to 6 digits if '.' in code_str: integer_part = code_str.split('.')[0] digits_only = ''.join(c for c in integer_part if c.isdigit()) if len(digits_only) > 0: return digits_only.zfill(6) else: digits_only = ''.join(c for c in code_str if c.isdigit()) if len(digits_only) > 0: return digits_only.zfill(6) else: # HOYA: "554-718" -> "554718" # Ohara: "528765" -> "528765" # SCHOTT: handled separately # Remove dashes and decimal points, keep only digits digits_only = ''.join(c for c in code_str if c.isdigit()) if len(digits_only) >= 6: return digits_only[:6] # Take first 6 digits elif len(digits_only) > 0: return digits_only.zfill(6) # Pad with leading zeros return None except (ValueError, TypeError): return None def convert_hoya_excel(excel_path): """Convert HOYA Excel catalog to glasses list.""" df = pd.read_excel(excel_path, skiprows=3) glasses = [] for _, row in df.iterrows(): if pd.isna(row.iloc[2]): # Glass name in column 2 continue try: glass = { "name": str(row.iloc[2]).strip(), "nd": float(row.iloc[4]), "vd": float(row.iloc[5]), "manufacturer": "HOYA" } # Add glass code (column 3: Code) glass_code = extract_glass_code(row.iloc[3], "HOYA") if glass_code: glass["glass_code"] = glass_code # Add dispersion formula for HOYA dispersion = {"dispersion_formula": "HOYA"} glass["dispersion"] = dispersion glasses.append(glass) except (ValueError, IndexError, TypeError): continue return glasses def convert_cdgm_excel(excel_path): """Convert CDGM Excel catalog to glasses list.""" df = pd.read_excel(excel_path, skiprows=1) glasses = [] for _, row in df.iterrows(): if pd.isna(row.iloc[0]) or row.iloc[0] == '': continue try: # Glass name in col 0, nd at col 13 (587.56 nm), vd at col 23 glass = { "name": str(row.iloc[0]).strip(), "nd": float(row.iloc[13]), # n at 587.56 nm (d-line) "vd": float(row.iloc[23]), # Abbe number "manufacturer": "CDGM" } # Add glass code (column 1: Code) glass_code = extract_glass_code(row.iloc[1], "CDGM") if glass_code: glass["glass_code"] = glass_code # Add dispersion formula for CDGM dispersion = {"dispersion_formula": "CDGM"} glass["dispersion"] = dispersion glasses.append(glass) except (ValueError, IndexError, TypeError): continue return glasses def convert_ohara_excel(excel_path): """Convert Ohara Excel catalog to glasses list.""" df = pd.read_excel(excel_path, skiprows=1) glasses = [] for _, row in df.iterrows(): if pd.isna(row.iloc[1]) or row.iloc[1] == '': # Glass name in col 1 continue try: glass = { "name": str(row.iloc[1]).strip(), "nd": float(row.iloc[16]), # nd column "vd": float(row.iloc[24]), # vd column (first valid Abbe number) "manufacturer": "Ohara" } # Add glass code (column 2: Code(d)) glass_code = extract_glass_code(row.iloc[2], "Ohara") if glass_code: glass["glass_code"] = glass_code # Add dispersion formula for Ohara dispersion = {"dispersion_formula": "Ohara"} glass["dispersion"] = dispersion glasses.append(glass) except (ValueError, IndexError, TypeError): continue return glasses def detect_manufacturer(excel_path): """Detect manufacturer from filename.""" filename = Path(excel_path).name.lower() if 'schott' in filename: return 'schott' elif 'hoya' in filename: return 'hoya' elif 'cdgm' in filename: return 'cdgm' elif 'ohara' in filename: return 'ohara' else: return 'unknown' def process_excel_files(input_path, output_file="glasses.json"): """Process Excel file(s) and output single JSON.""" input_path = Path(input_path) if not input_path.exists(): print(f"Error: Path {input_path} not found") sys.exit(1) # Determine if input is file or directory if input_path.is_file(): if input_path.suffix.lower() not in ['.xlsx', '.xls']: print(f"Error: {input_path} is not an Excel file") sys.exit(1) excel_files = [input_path] print(f"Processing single Excel file: {input_path.name}") elif input_path.is_dir(): # Find all Excel files in directory excel_files = list(input_path.glob("*.xlsx")) + list(input_path.glob("*.xls")) if not excel_files: print(f"No Excel files found in directory {input_path}") sys.exit(1) print(f"Processing directory: {input_path}") print(f"Found {len(excel_files)} Excel file(s)") else: print(f"Error: {input_path} is neither a file nor a directory") sys.exit(1) all_glasses = [] total_count = 0 for excel_file in excel_files: manufacturer = detect_manufacturer(excel_file) print(f"Processing {manufacturer.upper()}: {excel_file.name}") try: if manufacturer == 'schott': glasses = convert_schott_excel(excel_file) elif manufacturer == 'hoya': glasses = convert_hoya_excel(excel_file) elif manufacturer == 'cdgm': glasses = convert_cdgm_excel(excel_file) elif manufacturer == 'ohara': glasses = convert_ohara_excel(excel_file) else: print(f" Warning: Unknown manufacturer for {excel_file.name}, skipping") continue print(f" Extracted {len(glasses)} glasses") all_glasses.extend(glasses) total_count += len(glasses) except Exception as e: print(f" Error processing {excel_file.name}: {e}") continue # Group glasses by manufacturer manufacturers_data = {} for glass in all_glasses: manufacturer = glass["manufacturer"] if manufacturer not in manufacturers_data: manufacturers_data[manufacturer] = { "glass_count": 0, "glasses": [] } # Remove manufacturer field from individual glass since it's now the key glass_data = {k: v for k, v in glass.items() if k != "manufacturer"} manufacturers_data[manufacturer]["glasses"].append(glass_data) manufacturers_data[manufacturer]["glass_count"] += 1 # Create output structure with nested manufacturers output_data = { "format_version": "1.0", "generated_date": pd.Timestamp.now().isoformat(), "total_glasses": total_count, "manufacturers": manufacturers_data } # Ensure output directory exists output_path = Path(output_file) output_path.parent.mkdir(exist_ok=True) # Write JSON with open(output_path, 'w') as f: json.dump(output_data, f, indent=2, ensure_ascii=False) print(f"\nSummary:") print(f" Total glasses: {total_count}") print(f" Manufacturers: {', '.join(manufacturers_data.keys())}") for mfg, data in manufacturers_data.items(): print(f" {mfg}: {data['glass_count']} glasses") print(f" Output file: {output_path}") def main(): """Main function with argument parsing.""" import argparse parser = argparse.ArgumentParser( description='Convert optical glass Excel catalogs to JSON format', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Process all Excel files in a directory python excel_to_json.py data/Excel/ # Process a single Excel file python excel_to_json.py data/Excel/HOYA20210421.xlsx # Specify output file python excel_to_json.py data/Excel/ -o my_glasses.json """ ) parser.add_argument('input', help='Input Excel file or directory containing Excel files') parser.add_argument('-o', '--output', default='glasses.json', help='Output JSON file (default: glasses.json in current directory)') args = parser.parse_args() process_excel_files(args.input, args.output) if __name__ == "__main__": main()