import pandas as pd
import json
import os

file_path = os.getcwd() + 'PLU+FSMA+list+v1.0.xlsx'
sheet_name = 'Non FTL' 
new_url_base = "https://server-ifps.accurateig.com/assets/commodities/"

def get_one_of_each():
    if not os.path.exists(file_path):
        print("❌ Excel file not found.")
        return

    # 1. Load Excel
    df = pd.read_excel(file_path, sheet_name=sheet_name)

    # 2. Drop rows missing the essentials
    df = df.dropna(subset=['IMAGE', 'PLU', 'COMMODITY'])

    # 3. CRITICAL: Drop duplicates by COMMODITY only
    # This ignores Variety and Size, giving us exactly one row per fruit type.
    df_unique = df.drop_duplicates(subset=['COMMODITY'], keep='first')

    data_output = []

    for _, row in df_unique.iterrows():
        # Extract filename from the messy URL in Excel
        original_link = str(row['IMAGE'])
        filename = original_link.split('/')[-1]
        
        # Build the final working URL
        image_url = f"{new_url_base}{filename}"
        
        # Get the clean Commodity name
        commodity = str(row['COMMODITY']).title()
        plu_code = str(row['PLU'])

        data_output.append({
            "name": commodity,
            "plu": plu_code,
            "image": image_url
        })

    # 4. Save to JSON
    with open('one_of_each.json', 'w', encoding='utf-8') as f:
        json.dump(data_output, f, indent=4, ensure_ascii=False)

    print(f"✅ Success! Generated 'one_of_each.json' with {len(data_output)} unique commodities.")

if __name__ == "__main__":
    get_one_of_each()