from re import sub, IGNORECASE, findall
from requests import Session
from bs4 import BeautifulSoup
import asyncio
import random
import logging

async def scrape_flipkart_price_and_name(url):
    """
    Scrape both Flipkart product price and name in a single request to avoid rate limiting
    """
    # List of user agents to rotate
    user_agents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0'
    ]
    
    session = Session()
    session.headers.update({
        'User-Agent': random.choice(user_agents),
        'Accept-Language': 'en-US,en;q=0.9,hi;q=0.8',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Referer': 'https://www.flipkart.com/',
        'Origin': 'https://www.flipkart.com',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Cache-Control': 'no-cache',
        'Pragma': 'no-cache'
    })
    
    try:
        # Add random delay to avoid rate limiting
        await asyncio.sleep(random.uniform(2, 5))
        
        # First, visit Flipkart homepage to get session cookies
        session.get('https://www.flipkart.com/', timeout=10)
        await asyncio.sleep(random.uniform(1, 2))
        
        # Now try to get the product page
        data = session.get(url, timeout=15)
        if data.status_code == 200:
            soup = BeautifulSoup(data.content, 'lxml')
            
            # Extract price
            price_selectors = [
                "div._30jeq3._16Jk6d",
                "div._30jeq3",
                "div._16Jk6d", 
                "span._30jeq3._16Jk6d",
                "span._30jeq3",
                "div.Nx9bqj.CxhGGd",
                "div._30jeq3.CgYfOw.j-ONb3"
            ]
            
            price = None
            pricediv = None
            for selector in price_selectors:
                pricediv = soup.select_one(selector)
                if pricediv and pricediv.get_text(strip=True):
                    break
            
            if pricediv and pricediv.get_text(strip=True):
                price_text = pricediv.get_text(strip=True)
                print(f"Found price: {price_text}")
                # Remove currency symbols and extract number
                price_clean = sub(r"[₹,$\s]", "", price_text, flags=IGNORECASE)
                price_clean = sub(r"[^\d.]", "", price_clean)  # Keep only digits and dots
                if price_clean:
                    price = float(price_clean)
                    logging.info(f"Successfully extracted price: Rs.{price}")
            
            # Extract product name
            title_selectors = [
                "span.VU-ZEz",           # Updated selector for newer Flipkart design
                "span.B_NuCI", 
                "h1.x-product-title-label",
                "h1._35KyD6",
                "span._35KyD6",
                "h1.yhZ71d",
                "span.yhZ71d",
                "span.VU-ZEz._36FX1L",   # Another common selector
                "h1.Wphh3N",            # Another possible title selector
                "[data-testid='product-title']"  # Data attribute selector
            ]
            
            product_name = None
            for selector in title_selectors:
                title_elem = soup.select_one(selector)
                if title_elem and title_elem.get_text(strip=True):
                    product_name = title_elem.get_text(strip=True)
                    print(f"Found title with selector '{selector}': {product_name[:50]}...")
                    break
            
            # If no product name found, try fallback to page title
            if not product_name:
                title = soup.find('title')
                if title:
                    title_text = title.get_text()
                    print(f"Using page title: {title_text}")
                    # Remove "Buy" and "online at best price" etc.
                    clean_title = title_text.replace("Buy ", "").split(" online")[0].split(" at best")[0]
                    product_name = clean_title.strip()
            
            return product_name, price
            
        else:
            logging.error(f"Error fetching URL: {url}, status code: {data.status_code}")
            if data.status_code == 403:
                logging.error("403 Forbidden - Consider using a VPN or proxy service")
            return None, None
    except Exception as e:
        logging.error(f"Exception during request to {url}: {e}")
        return None, None
    finally:
        session.close()

async def scrape_flipkart_price(url):
    """
    Scrape Flipkart product price using the working implementation from Flipkart-Bot
    """
    product_name, price = await scrape_flipkart_price_and_name(url)
    return price

def extract_flipkart_product_name(url):
    """
    Extract product name from Flipkart URL - now uses the combined async function
    """
    import asyncio
    try:
        # Use the combined function to get both name and price in one request
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        product_name, _ = loop.run_until_complete(scrape_flipkart_price_and_name(url))
        loop.close()
        return product_name
    except Exception as e:
        print(f"Error extracting product name: {e}")
        return None