1 files changed, 241 insertions, 0 deletions
diff --git a/scripts/excel_to_json.py b/scripts/excel_to_json.py
new file mode 100755
index 0000000..73e52e6
--- /dev/null
+++ b/scripts/excel_to_json.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""
+Convert all optical glass Excel catalogs to a single JSON format for GlaMaC.
+Processes all Excel files in data/Excel/ directory and extracts nd and vd values.
+"""
+
+import pandas as pd
+import json
+import sys
+import os
+from pathlib import Path
+
+def convert_schott_excel(excel_path):
+    """Convert SCHOTT Excel catalog to glasses list."""
+    df = pd.read_excel(excel_path, skiprows=6)
+    glasses = []
+    
+    for _, row in df.iterrows():
+        if pd.isna(row.iloc[0]) or row.iloc[0] == '':
+            continue
+            
+        try:
+            glass = {
+                "name": str(row.iloc[0]).strip(),
+                "nd": float(row.iloc[1]),
+                "vd": float(row.iloc[3]),
+                "manufacturer": "SCHOTT"
+            }
+            glasses.append(glass)
+        except (ValueError, IndexError, TypeError):
+            continue
+    
+    return glasses
+
+def convert_hoya_excel(excel_path):
+    """Convert HOYA Excel catalog to glasses list."""
+    df = pd.read_excel(excel_path, skiprows=3)
+    glasses = []
+    
+    for _, row in df.iterrows():
+        if pd.isna(row.iloc[2]):  # Glass name in column 2
+            continue
+            
+        try:
+            glass = {
+                "name": str(row.iloc[2]).strip(),
+                "nd": float(row.iloc[4]),
+                "vd": float(row.iloc[5]),
+                "manufacturer": "HOYA"
+            }
+            glasses.append(glass)
+        except (ValueError, IndexError, TypeError):
+            continue
+    
+    return glasses
+
+def convert_cdgm_excel(excel_path):
+    """Convert CDGM Excel catalog to glasses list."""
+    df = pd.read_excel(excel_path, skiprows=1)
+    glasses = []
+    
+    for _, row in df.iterrows():
+        if pd.isna(row.iloc[0]) or row.iloc[0] == '':
+            continue
+            
+        try:
+            # Glass name in col 0, nd at col 13 (587.56 nm), vd at col 23
+            glass = {
+                "name": str(row.iloc[0]).strip(),
+                "nd": float(row.iloc[13]),  # n at 587.56 nm (d-line)
+                "vd": float(row.iloc[23]),  # Abbe number
+                "manufacturer": "CDGM"
+            }
+            glasses.append(glass)
+        except (ValueError, IndexError, TypeError):
+            continue
+    
+    return glasses
+
+def convert_ohara_excel(excel_path):
+    """Convert Ohara Excel catalog to glasses list."""
+    df = pd.read_excel(excel_path, skiprows=1)
+    glasses = []
+    
+    for _, row in df.iterrows():
+        if pd.isna(row.iloc[1]) or row.iloc[1] == '':  # Glass name in col 1
+            continue
+            
+        try:
+            glass = {
+                "name": str(row.iloc[1]).strip(),
+                "nd": float(row.iloc[16]),  # nd column
+                "vd": float(row.iloc[24]),  # vd column (first valid Abbe number)
+                "manufacturer": "Ohara"
+            }
+            glasses.append(glass)
+        except (ValueError, IndexError, TypeError):
+            continue
+    
+    return glasses
+
+def detect_manufacturer(excel_path):
+    """Detect manufacturer from filename."""
+    filename = Path(excel_path).name.lower()
+    
+    if 'schott' in filename:
+        return 'schott'
+    elif 'hoya' in filename:
+        return 'hoya'
+    elif 'cdgm' in filename:
+        return 'cdgm'
+    elif 'ohara' in filename:
+        return 'ohara'
+    else:
+        return 'unknown'
+
+def process_excel_files(input_path, output_file="glasses.json"):
+    """Process Excel file(s) and output single JSON."""
+    
+    input_path = Path(input_path)
+    if not input_path.exists():
+        print(f"Error: Path {input_path} not found")
+        sys.exit(1)
+    
+    # Determine if input is file or directory
+    if input_path.is_file():
+        if input_path.suffix.lower() not in ['.xlsx', '.xls']:
+            print(f"Error: {input_path} is not an Excel file")
+            sys.exit(1)
+        excel_files = [input_path]
+        print(f"Processing single Excel file: {input_path.name}")
+    elif input_path.is_dir():
+        # Find all Excel files in directory
+        excel_files = list(input_path.glob("*.xlsx")) + list(input_path.glob("*.xls"))
+        if not excel_files:
+            print(f"No Excel files found in directory {input_path}")
+            sys.exit(1)
+        print(f"Processing directory: {input_path}")
+        print(f"Found {len(excel_files)} Excel file(s)")
+    else:
+        print(f"Error: {input_path} is neither a file nor a directory")
+        sys.exit(1)
+    
+    all_glasses = []
+    total_count = 0
+    
+    for excel_file in excel_files:
+        manufacturer = detect_manufacturer(excel_file)
+        print(f"Processing {manufacturer.upper()}: {excel_file.name}")
+        
+        try:
+            if manufacturer == 'schott':
+                glasses = convert_schott_excel(excel_file)
+            elif manufacturer == 'hoya':
+                glasses = convert_hoya_excel(excel_file)
+            elif manufacturer == 'cdgm':
+                glasses = convert_cdgm_excel(excel_file)
+            elif manufacturer == 'ohara':
+                glasses = convert_ohara_excel(excel_file)
+            else:
+                print(f"  Warning: Unknown manufacturer for {excel_file.name}, skipping")
+                continue
+            
+            print(f"  Extracted {len(glasses)} glasses")
+            all_glasses.extend(glasses)
+            total_count += len(glasses)
+            
+        except Exception as e:
+            print(f"  Error processing {excel_file.name}: {e}")
+            continue
+    
+    # Group glasses by manufacturer
+    manufacturers_data = {}
+    for glass in all_glasses:
+        manufacturer = glass["manufacturer"]
+        if manufacturer not in manufacturers_data:
+            manufacturers_data[manufacturer] = {
+                "glass_count": 0,
+                "glasses": []
+            }
+        
+        # Remove manufacturer field from individual glass since it's now the key
+        glass_data = {k: v for k, v in glass.items() if k != "manufacturer"}
+        manufacturers_data[manufacturer]["glasses"].append(glass_data)
+        manufacturers_data[manufacturer]["glass_count"] += 1
+    
+    # Create output structure with nested manufacturers
+    output_data = {
+        "format_version": "1.0",
+        "generated_date": pd.Timestamp.now().isoformat(),
+        "total_glasses": total_count,
+        "manufacturers": manufacturers_data
+    }
+    
+    # Ensure output directory exists
+    output_path = Path(output_file)
+    output_path.parent.mkdir(exist_ok=True)
+    
+    # Write JSON
+    with open(output_path, 'w') as f:
+        json.dump(output_data, f, indent=2, ensure_ascii=False)
+    
+    print(f"\nSummary:")
+    print(f"  Total glasses: {total_count}")
+    print(f"  Manufacturers: {', '.join(manufacturers_data.keys())}")
+    for mfg, data in manufacturers_data.items():
+        print(f"    {mfg}: {data['glass_count']} glasses")
+    print(f"  Output file: {output_path}")
+
+def main():
+    """Main function with argument parsing."""
+    import argparse
+    
+    parser = argparse.ArgumentParser(
+        description='Convert optical glass Excel catalogs to JSON format',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Process all Excel files in a directory
+  python excel_to_json.py data/Excel/
+  
+  # Process a single Excel file
+  python excel_to_json.py data/Excel/HOYA20210421.xlsx
+  
+  # Specify output file
+  python excel_to_json.py data/Excel/ -o my_glasses.json
+        """
+    )
+    
+    parser.add_argument('input', 
+                       help='Input Excel file or directory containing Excel files')
+    parser.add_argument('-o', '--output', 
+                       default='glasses.json',
+                       help='Output JSON file (default: glasses.json in current directory)')
+    
+    args = parser.parse_args()
+    
+    process_excel_files(args.input, args.output)
+
+if __name__ == "__main__":
+    main()
+\ No newline at end of file