summaryrefslogtreecommitdiff
path: root/scripts/excel_to_json.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/excel_to_json.py')
-rwxr-xr-xscripts/excel_to_json.py241
1 files changed, 241 insertions, 0 deletions
diff --git a/scripts/excel_to_json.py b/scripts/excel_to_json.py
new file mode 100755
index 0000000..73e52e6
--- /dev/null
+++ b/scripts/excel_to_json.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""
+Convert all optical glass Excel catalogs to a single JSON format for GlaMaC.
+Processes all Excel files in data/Excel/ directory and extracts nd and vd values.
+"""
+
+import pandas as pd
+import json
+import sys
+import os
+from pathlib import Path
+
+def convert_schott_excel(excel_path):
+ """Convert SCHOTT Excel catalog to glasses list."""
+ df = pd.read_excel(excel_path, skiprows=6)
+ glasses = []
+
+ for _, row in df.iterrows():
+ if pd.isna(row.iloc[0]) or row.iloc[0] == '':
+ continue
+
+ try:
+ glass = {
+ "name": str(row.iloc[0]).strip(),
+ "nd": float(row.iloc[1]),
+ "vd": float(row.iloc[3]),
+ "manufacturer": "SCHOTT"
+ }
+ glasses.append(glass)
+ except (ValueError, IndexError, TypeError):
+ continue
+
+ return glasses
+
+def convert_hoya_excel(excel_path):
+ """Convert HOYA Excel catalog to glasses list."""
+ df = pd.read_excel(excel_path, skiprows=3)
+ glasses = []
+
+ for _, row in df.iterrows():
+ if pd.isna(row.iloc[2]): # Glass name in column 2
+ continue
+
+ try:
+ glass = {
+ "name": str(row.iloc[2]).strip(),
+ "nd": float(row.iloc[4]),
+ "vd": float(row.iloc[5]),
+ "manufacturer": "HOYA"
+ }
+ glasses.append(glass)
+ except (ValueError, IndexError, TypeError):
+ continue
+
+ return glasses
+
+def convert_cdgm_excel(excel_path):
+ """Convert CDGM Excel catalog to glasses list."""
+ df = pd.read_excel(excel_path, skiprows=1)
+ glasses = []
+
+ for _, row in df.iterrows():
+ if pd.isna(row.iloc[0]) or row.iloc[0] == '':
+ continue
+
+ try:
+ # Glass name in col 0, nd at col 13 (587.56 nm), vd at col 23
+ glass = {
+ "name": str(row.iloc[0]).strip(),
+ "nd": float(row.iloc[13]), # n at 587.56 nm (d-line)
+ "vd": float(row.iloc[23]), # Abbe number
+ "manufacturer": "CDGM"
+ }
+ glasses.append(glass)
+ except (ValueError, IndexError, TypeError):
+ continue
+
+ return glasses
+
+def convert_ohara_excel(excel_path):
+ """Convert Ohara Excel catalog to glasses list."""
+ df = pd.read_excel(excel_path, skiprows=1)
+ glasses = []
+
+ for _, row in df.iterrows():
+ if pd.isna(row.iloc[1]) or row.iloc[1] == '': # Glass name in col 1
+ continue
+
+ try:
+ glass = {
+ "name": str(row.iloc[1]).strip(),
+ "nd": float(row.iloc[16]), # nd column
+ "vd": float(row.iloc[24]), # vd column (first valid Abbe number)
+ "manufacturer": "Ohara"
+ }
+ glasses.append(glass)
+ except (ValueError, IndexError, TypeError):
+ continue
+
+ return glasses
+
+def detect_manufacturer(excel_path):
+ """Detect manufacturer from filename."""
+ filename = Path(excel_path).name.lower()
+
+ if 'schott' in filename:
+ return 'schott'
+ elif 'hoya' in filename:
+ return 'hoya'
+ elif 'cdgm' in filename:
+ return 'cdgm'
+ elif 'ohara' in filename:
+ return 'ohara'
+ else:
+ return 'unknown'
+
+def process_excel_files(input_path, output_file="glasses.json"):
+ """Process Excel file(s) and output single JSON."""
+
+ input_path = Path(input_path)
+ if not input_path.exists():
+ print(f"Error: Path {input_path} not found")
+ sys.exit(1)
+
+ # Determine if input is file or directory
+ if input_path.is_file():
+ if input_path.suffix.lower() not in ['.xlsx', '.xls']:
+ print(f"Error: {input_path} is not an Excel file")
+ sys.exit(1)
+ excel_files = [input_path]
+ print(f"Processing single Excel file: {input_path.name}")
+ elif input_path.is_dir():
+ # Find all Excel files in directory
+ excel_files = list(input_path.glob("*.xlsx")) + list(input_path.glob("*.xls"))
+ if not excel_files:
+ print(f"No Excel files found in directory {input_path}")
+ sys.exit(1)
+ print(f"Processing directory: {input_path}")
+ print(f"Found {len(excel_files)} Excel file(s)")
+ else:
+ print(f"Error: {input_path} is neither a file nor a directory")
+ sys.exit(1)
+
+ all_glasses = []
+ total_count = 0
+
+ for excel_file in excel_files:
+ manufacturer = detect_manufacturer(excel_file)
+ print(f"Processing {manufacturer.upper()}: {excel_file.name}")
+
+ try:
+ if manufacturer == 'schott':
+ glasses = convert_schott_excel(excel_file)
+ elif manufacturer == 'hoya':
+ glasses = convert_hoya_excel(excel_file)
+ elif manufacturer == 'cdgm':
+ glasses = convert_cdgm_excel(excel_file)
+ elif manufacturer == 'ohara':
+ glasses = convert_ohara_excel(excel_file)
+ else:
+ print(f" Warning: Unknown manufacturer for {excel_file.name}, skipping")
+ continue
+
+ print(f" Extracted {len(glasses)} glasses")
+ all_glasses.extend(glasses)
+ total_count += len(glasses)
+
+ except Exception as e:
+ print(f" Error processing {excel_file.name}: {e}")
+ continue
+
+ # Group glasses by manufacturer
+ manufacturers_data = {}
+ for glass in all_glasses:
+ manufacturer = glass["manufacturer"]
+ if manufacturer not in manufacturers_data:
+ manufacturers_data[manufacturer] = {
+ "glass_count": 0,
+ "glasses": []
+ }
+
+ # Remove manufacturer field from individual glass since it's now the key
+ glass_data = {k: v for k, v in glass.items() if k != "manufacturer"}
+ manufacturers_data[manufacturer]["glasses"].append(glass_data)
+ manufacturers_data[manufacturer]["glass_count"] += 1
+
+ # Create output structure with nested manufacturers
+ output_data = {
+ "format_version": "1.0",
+ "generated_date": pd.Timestamp.now().isoformat(),
+ "total_glasses": total_count,
+ "manufacturers": manufacturers_data
+ }
+
+ # Ensure output directory exists
+ output_path = Path(output_file)
+ output_path.parent.mkdir(exist_ok=True)
+
+ # Write JSON
+ with open(output_path, 'w') as f:
+ json.dump(output_data, f, indent=2, ensure_ascii=False)
+
+ print(f"\nSummary:")
+ print(f" Total glasses: {total_count}")
+ print(f" Manufacturers: {', '.join(manufacturers_data.keys())}")
+ for mfg, data in manufacturers_data.items():
+ print(f" {mfg}: {data['glass_count']} glasses")
+ print(f" Output file: {output_path}")
+
+def main():
+ """Main function with argument parsing."""
+ import argparse
+
+ parser = argparse.ArgumentParser(
+ description='Convert optical glass Excel catalogs to JSON format',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # Process all Excel files in a directory
+ python excel_to_json.py data/Excel/
+
+ # Process a single Excel file
+ python excel_to_json.py data/Excel/HOYA20210421.xlsx
+
+ # Specify output file
+ python excel_to_json.py data/Excel/ -o my_glasses.json
+ """
+ )
+
+ parser.add_argument('input',
+ help='Input Excel file or directory containing Excel files')
+ parser.add_argument('-o', '--output',
+ default='glasses.json',
+ help='Output JSON file (default: glasses.json in current directory)')
+
+ args = parser.parse_args()
+
+ process_excel_files(args.input, args.output)
+
+if __name__ == "__main__":
+ main() \ No newline at end of file
Back to https://optics-design.com