import streamlit as st import pandas as pd import plotly.express as px import plotly.io as pio import plotly.graph_objects as go import re from collections import Counter import streamlit.components.v1 as components import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart import nltk from nltk.stem import WordNetLemmatizer import ssl # --- LEVEL 2 AI SETUP (NATURAL LANGUAGE PROCESSING) --- try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: pass else: ssl._create_default_https_context = _create_unverified_https_context @st.cache_resource def setup_nlp(): """Initializes Level 2 AI Lemmatizer to understand root words (e.g., automating -> automation)""" try: nltk.download('wordnet', quiet=True) nltk.download('omw-1.4', quiet=True) except: pass return WordNetLemmatizer() lemmatizer = setup_nlp() # --- TABLEAU THEME SETUP FOR PLOTLY --- pio.templates.default = "plotly_white" custom_color_scale = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"] # --- PAGE CONFIG --- st.set_page_config(page_title="Synergi USM-SME Portal", layout="wide", page_icon="🚀") # --- CUSTOM ENTERPRISE CSS (TABLEAU-LIKE DASHBOARD) --- st.markdown(""" """, unsafe_allow_html=True) # --- DIRECT EMAIL SENDER HELPER --- def send_direct_email(sender_email, sender_password, recipient_email, subject, body): try: msg = MIMEMultipart() msg['From'] = sender_email msg['To'] = recipient_email msg['Subject'] = subject msg.attach(MIMEText(body, 'plain')) server = smtplib.SMTP_SSL('smtp.gmail.com', 465) server.login(sender_email, sender_password) server.send_message(msg) server.quit() return True, "Email dispatched successfully!" except smtplib.SMTPAuthenticationError: return False, "Authentication Failed: Check your email and ensure you are using an 'App Password'." except Exception as e: return False, f"Delivery Failed: {str(e)}" # --- BI-LINGUAL COLUMN EXTRACTOR HELPER --- def find_column(df, keywords): for col in df.columns: col_lower = col.lower() if any(kw in col_lower for kw in keywords): return col return None # --- DATA FETCHING & ROBUST CLEANING --- @st.cache_data(ttl=60) def load_and_clean_data(): sme_url = "https://docs.google.com/spreadsheets/d/1aDcJq-BlOMBNmAVb3p325SA92hIoEE08R2zIA5T3aPQ/export?format=csv&gid=1649886041" usm_url = "https://docs.google.com/spreadsheets/d/1aDcJq-BlOMBNmAVb3p325SA92hIoEE08R2zIA5T3aPQ/export?format=csv&gid=841945233" try: sme_df = pd.read_csv(sme_url) usm_df = pd.read_csv(usm_url) if '' in str(sme_df.columns[0]).lower(): st.error("Google Sheets returned an HTML page. Please ensure link sharing is set to 'Anyone with the link can view'.") return pd.DataFrame(), pd.DataFrame(), pd.DataFrame() sme_df = sme_df.fillna("N/A") usm_df = usm_df.fillna("N/A") except Exception as e: st.error(f"Failed to fetch data: {str(e)}") return pd.DataFrame(), pd.DataFrame(), pd.DataFrame() try: leads_df = pd.read_csv("investpenang_scraped_leads.csv") leads_df = leads_df.fillna("N/A") leads_df.columns = leads_df.columns.str.strip() except: leads_df = pd.DataFrame() if not sme_df.empty: sme_df.columns = sme_df.columns.str.strip() size_col = find_column(sme_df, ["company size", "saiz syarikat"]) interest_col = find_column(sme_df, ["innovation day", "hari inovasi", "berminat untuk", "interested to"]) readiness_col = find_column(sme_df, ["readiness for formal collaboration", "kesediaan untuk kolaborasi formal"]) funding_col = find_column(sme_df, ["budget or funding", "bajet atau pembiayaan"]) chal_col = find_column(sme_df, ["main challenge", "cabaran utama", "masalah pks yang ditangani"]) elab_col = find_column(sme_df, ["elaborate on the challenge", "huraikan cabaran tersebut"]) # Create standardized Size column if size_col: sme_df['Size_Trimmed'] = sme_df[size_col].apply(lambda x: str(x).split(',')[0].split(':')[0].split('-')[0].strip()) sme_df['Size_Trimmed'] = sme_df['Size_Trimmed'].replace({"N/A": "Unknown"}) def trim_readiness(row): val = (str(row.get(interest_col, "")) + " " + str(row.get(readiness_col, ""))).lower() if 'moa' in val: return 'Ready for MoA' elif 'mou' in val: return 'Ready for MoU' elif 'match' in val or 'pemadanan perniagaan' in val or 'padan' in val: return 'Business Matching' elif 'grant' in val or 'geran' in val or 'pembiayaan' in val: return 'Seeking Grants' elif 'network' in val or 'rangkaian' in val: return 'Networking Only' else: return 'Exploratory' sme_df['Readiness_Trimmed'] = sme_df.apply(trim_readiness, axis=1) if funding_col: def trim_funding(x): x = str(x).lower() if 'available' in x or 'ada' in x or 'tersedia' in x: return 'Budget Available' elif 'grant' in x or 'geran' in x or 'dana' in x: return 'Needs Grant' elif 'sharing' in x or 'kongsi' in x or 'bersama' in x: return 'Cost-Sharing' else: return 'To Be Discussed' sme_df['Funding_Trimmed'] = sme_df[funding_col].apply(trim_funding) if chal_col and elab_col: sme_df['LLM_Context'] = sme_df[chal_col].astype(str) + " " + sme_df[elab_col].astype(str).replace("N/A", "") elif chal_col: sme_df['LLM_Context'] = sme_df[chal_col].astype(str) if not usm_df.empty: usm_df.columns = usm_df.columns.str.strip() return sme_df, usm_df, leads_df sme_df, usm_df, leads_df = load_and_clean_data() SME_NAME_COL = find_column(sme_df, ["company name", "nama syarikat"]) SME_SECTOR_COL = find_column(sme_df, ["business sector", "sektor perniagaan"]) SME_CHAL_COL = find_column(sme_df, ["main challenge", "cabaran utama"]) SME_SIZE_COL = "Size_Trimmed" USM_PTJ_COL = find_column(usm_df, ["ptj", "nama ptj", "pusat pengajian"]) USM_EXP_COL = find_column(usm_df, ["expertise cluster", "kluster kepakaran"]) USM_KEYWORD_COL = find_column(usm_df, ["keywords", "kata kunci"]) USM_DESC_COL = find_column(usm_df, ["briefly describe", "terangkan secara ringkas"]) # --- SIDEBAR --- with st.sidebar: st.image("https://upload.wikimedia.org/wikipedia/en/thumb/b/b5/Universiti_Sains_Malaysia_logo.svg/1200px-Universiti_Sains_Malaysia_logo.svg.png", width=150) st.title("⚙️ Executive Navigation") view_mode = st.radio("Platform Modules", [ "📈 Ecosystem Investment Synthesis", "📇 Industry Opportunity Directory", "🧠 AI Matchmaking Engine", "📝 Submit Profile", "🔒 Admin Suite" ]) # ========================================== # VIEW 1: ANALYTICAL SYNTHESIS (THE ULTIMATE COMMAND CENTER) # ========================================== if view_mode == "📈 Ecosystem Investment Synthesis": st.title("📈 Synergi USM-SME") st.markdown("A macro-level spatial dashboard designed to visualize capital readiness, highlight investment hotspots, and map USM's strategic value to industry partners.") if not sme_df.empty: # --- GLOBAL INTERACTIVE FILTERS --- with st.container(): st.markdown("### 🎛️ Strategic Data Filters") f1, f2, f3, f4 = st.columns(4) dash_filtered_sme = sme_df.copy() with f1: dash_sector = st.multiselect("Industry Sector", sorted(dash_filtered_sme[SME_SECTOR_COL].unique()) if SME_SECTOR_COL else []) with f2: dash_intent = st.multiselect("Collaboration Readiness", dash_filtered_sme['Readiness_Trimmed'].unique()) with f3: dash_funding = st.multiselect("Capital / Funding", dash_filtered_sme['Funding_Trimmed'].unique()) with f4: if SME_SIZE_COL in dash_filtered_sme.columns: dash_size = st.multiselect("Enterprise Scale", dash_filtered_sme[SME_SIZE_COL].unique()) else: dash_size = [] if dash_sector: dash_filtered_sme = dash_filtered_sme[dash_filtered_sme[SME_SECTOR_COL].isin(dash_sector)] if dash_intent: dash_filtered_sme = dash_filtered_sme[dash_filtered_sme['Readiness_Trimmed'].isin(dash_intent)] if dash_funding: dash_filtered_sme = dash_filtered_sme[dash_filtered_sme['Funding_Trimmed'].isin(dash_funding)] if dash_size: dash_filtered_sme = dash_filtered_sme[dash_filtered_sme[SME_SIZE_COL].isin(dash_size)] st.markdown("
", unsafe_allow_html=True) # --- EXECUTIVE KPI METRICS --- total_sme_val = len(dash_filtered_sme) high_readiness_val = len(dash_filtered_sme[dash_filtered_sme['Readiness_Trimmed'].isin(['Ready for MoA', 'Ready for MoU', 'Business Matching'])]) capital_ready_val = len(dash_filtered_sme[dash_filtered_sme['Funding_Trimmed'].isin(['Budget Available', 'Cost-Sharing'])]) total_leads_val = len(leads_df) if not leads_df.empty else 0 st.markdown(f"""

{total_sme_val}

Inbound SME Profiles

{high_readiness_val}

High-Readiness Partners

{capital_ready_val}

Capital-Ready (Budgets)

{total_leads_val}

Market Expansion Leads

""", unsafe_allow_html=True) # --- DYNAMIC ROI VALUE PROPOSITION --- st.markdown(f"""

💡 Why Invest & Collaborate with USM Today?

Based on the current filtered ecosystem view, there are exactly {high_readiness_val} enterprises primed for immediate formal collaboration (MoU/MoA/Matching). Furthermore, {capital_ready_val} entities have indicated existing internal capital ready to be deployed for joint solutions. By integrating your resources with USM's {len(usm_df)} academic experts, investors gain direct, de-risked access to these vetted industry pipelines.

""", unsafe_allow_html=True) # --- SPATIAL CHARTS: ROW 1 (HIERARCHY & CAPITAL) --- c1, c2 = st.columns([1.2, 1]) with c1: st.markdown("#### 🧭 Ecosystem Capability Hierarchy") st.caption("Interactive Treemap: Click a Sector block to zoom into Collaboration & Funding pipelines.") if not dash_filtered_sme.empty and SME_SECTOR_COL: df_tree = dash_filtered_sme.copy() df_tree[SME_SECTOR_COL] = df_tree[SME_SECTOR_COL].astype(str) # Replaced Sunburst with Treemap for vastly superior text readability fig_tree_inbound = px.treemap( df_tree, path=[px.Constant("USM-SME Ecosystem"), SME_SECTOR_COL, 'Readiness_Trimmed', 'Funding_Trimmed'], color='Readiness_Trimmed', color_discrete_sequence=custom_color_scale ) fig_tree_inbound.update_traces( textinfo="label+value", textfont=dict(size=13, weight="bold") ) fig_tree_inbound.update_layout( margin=dict(t=10, l=10, r=10, b=10), paper_bgcolor='rgba(0,0,0,0)', height=420 ) st.plotly_chart(fig_tree_inbound, use_container_width=True) else: st.info("Insufficient data.") with c2: st.markdown("#### 💰 Capital Landscape Matrix") st.caption("Identifies which industry sectors possess liquid budgets versus those requiring USM grant facilitation.") if not dash_filtered_sme.empty and SME_SECTOR_COL: capital_df = pd.crosstab(dash_filtered_sme[SME_SECTOR_COL], dash_filtered_sme['Funding_Trimmed']).reset_index() fig_cap = px.bar( capital_df, x=SME_SECTOR_COL, y=[col for col in capital_df.columns if col != SME_SECTOR_COL], barmode='stack', color_discrete_sequence=["#2ca02c", "#fca311", "#1f77b4", "#d62728"] ) fig_cap.update_layout( xaxis_title="", yaxis_title="Number of SMEs", legend_title="Funding Status", legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), margin=dict(t=10, l=10, r=10, b=10), paper_bgcolor='rgba(0,0,0,0)', height=420 ) fig_cap.update_xaxes(tickangle=45) st.plotly_chart(fig_cap, use_container_width=True) st.markdown("

", unsafe_allow_html=True) # --- SPATIAL CHARTS: ROW 2 (NLP THEMATIC MAPPING) --- c3, c4 = st.columns([1, 1.2]) # NLP Pre-Processing for Row 2 nlp_df = dash_filtered_sme.copy() theme_counts = {"Automation & Industry 4.0": 0, "Supply Chain & Operations": 0, "Financial & Cost": 0, "Talent & Workforce": 0, "R&D & Innovation": 0} all_lemmas = [] if 'LLM_Context' in nlp_df.columns and not nlp_df.empty: text_data = " ".join(nlp_df['LLM_Context'].dropna()).lower() if text_data.strip() and text_data.strip() != "n/a": words = re.findall(r'\b[a-z]{4,}\b', text_data) eng_stopwords = {"with", "that", "this", "from", "have", "need", "currently", "faced", "company", "please", "briefly", "elaborate", "above", "selected", "what", "main", "issue", "cost", "adoption", "testing", "the", "and", "for", "are", "our", "their"} malay_stopwords = {"dan", "yang", "untuk", "dengan", "daripada", "kepada", "kami", "syarikat", "dalam", "pada", "adalah", "sebagai", "tidak", "akan", "boleh", "oleh", "juga", "atau", "cabaran", "utama", "masalah", "ini", "itu", "ada", "telah", "sila", "huraikan", "tersebut", "apakah"} stopwords = eng_stopwords.union(malay_stopwords) # Theme Dictionary (Level 2 AI Mapping) theme_dict = { "Automation & Industry 4.0": ["automation", "robot", "digital", "iot", "system", "technology", "software", "machine", "automasi", "sistem", "digital", "mesin", "teknologi"], "Supply Chain & Operations": ["supply", "chain", "logistics", "vendor", "material", "production", "bekalan", "logistik", "bahan", "pengeluaran", "operasi"], "Financial & Cost": ["cost", "fund", "grant", "budget", "finance", "price", "kos", "dana", "kewangan", "bajet", "geran", "pembiayaan", "harga"], "Talent & Workforce": ["talent", "worker", "skill", "training", "staff", "pekerja", "kemahiran", "latihan", "bakat", "staf"], "R&D & Innovation": ["research", "develop", "product", "innovate", "test", "design", "penyelidikan", "inovasi", "produk", "uji", "reka"] } for w in words: if w not in stopwords: try: root_word = lemmatizer.lemmatize(w) except: root_word = w all_lemmas.append(root_word) # Score themes for theme, triggers in theme_dict.items(): if root_word in triggers or w in triggers: theme_counts[theme] += 1 with c3: st.markdown("#### 🎯 NLP Strategic Investment Themes") st.caption("Radar Chart: AI groups extracted text into 5 core industrial demand zones.") if sum(theme_counts.values()) > 0: radar_df = pd.DataFrame({ 'Theme': list(theme_counts.keys()), 'Intensity Score': list(theme_counts.values()) }) fig_radar = px.line_polar( radar_df, r='Intensity Score', theta='Theme', line_close=True, color_discrete_sequence=["#ff7f0e"] ) fig_radar.update_traces(fill='toself', fillcolor='rgba(255, 127, 14, 0.4)') fig_radar.update_layout( polar=dict(radialaxis=dict(visible=True, showticklabels=False)), margin=dict(t=30, b=30, l=30, r=30), paper_bgcolor='rgba(0,0,0,0)', height=380 ) st.plotly_chart(fig_radar, use_container_width=True) else: st.info("Insufficient text data to generate AI Theme Radar.") with c4: st.markdown("#### 🫧 Granular Pain-Point Matrix") st.caption("Bubble Spatial Plot: Frequency distribution of specific lemmatized keywords.") if all_lemmas: word_counts = Counter(all_lemmas).most_common(20) bubble_df = pd.DataFrame(word_counts, columns=['Keyword', 'Frequency']) # Create a spatial scatter plot to act as a Bubble Chart fig_bubble = px.scatter( bubble_df, x='Keyword', y='Frequency', size='Frequency', color='Frequency', color_continuous_scale='Teal', size_max=45 ) fig_bubble.update_layout( xaxis_title="", yaxis_title="Mention Frequency", margin=dict(t=10, b=10, l=10, r=10), paper_bgcolor='rgba(0,0,0,0)', height=380 ) fig_bubble.update_xaxes(tickangle=45) st.plotly_chart(fig_bubble, use_container_width=True) else: st.info("Insufficient text data for Granular Matrix.") st.markdown("

", unsafe_allow_html=True) # --- SPATIAL CHARTS: ROW 3 (MARKET EXPANSION & ENTERPRISE SCALE) --- c5, c6 = st.columns([1.2, 1]) with c5: st.markdown("#### 🗺️ Scraped Outbound Market Density") st.caption("Treemap displaying spatial density of external regional targets scraped from InvestPenang.") if not leads_df.empty: lead_agg = leads_df.groupby('Business Sector').size().reset_index(name='Target Volume') lead_agg['Ecosystem'] = 'External Market Expansion' fig_tree = px.treemap( lead_agg, path=['Ecosystem', 'Business Sector'], values='Target Volume', color='Target Volume', color_continuous_scale='Blues' ) fig_tree.update_traces(textinfo="label+value") fig_tree.update_layout(margin=dict(t=10, l=10, r=10, b=10), paper_bgcolor='rgba(0,0,0,0)', height=380) st.plotly_chart(fig_tree, use_container_width=True) else: st.info("Upload 'investpenang_scraped_leads.csv' to render spatial market density.") with c6: st.markdown("#### 🏢 Inbound Enterprise Scale") st.caption("Donut Chart: Distribution of SME operational sizes within the current filtered ecosystem.") if SME_SIZE_COL in dash_filtered_sme.columns and not dash_filtered_sme.empty: size_df = dash_filtered_sme[SME_SIZE_COL].value_counts().reset_index() size_df.columns = ['Scale', 'Count'] fig_donut = px.pie( size_df, names='Scale', values='Count', hole=0.55, color_discrete_sequence=px.colors.sequential.Agsunset ) fig_donut.update_traces(textposition='inside', textinfo='percent+label') fig_donut.update_layout(margin=dict(t=10, b=10, l=10, r=10), showlegend=False, paper_bgcolor='rgba(0,0,0,0)', height=380) st.plotly_chart(fig_donut, use_container_width=True) else: st.info("No enterprise scale data available.") # ========================================== # VIEW 2: REACTIVE DIRECTORY # ========================================== elif view_mode == "📇 Industry Opportunity Directory": st.title("📇 Industry Opportunity Directory") pipeline_type = st.radio("Select Target Pipeline Source Segment:", [ "🔴 Inbound Proposals (Active Form Submissions)", "🔵 Outbound Market Targets (Scraped InvestPenang LLC-SME Directory)" ], horizontal=True) if "Inbound Proposals" in pipeline_type: st.markdown("Use the filters below to slice the data. The table updates instantly.") if not sme_df.empty: search_query = st.text_input("🔍 Search Keyword (Company, Challenge, Keyword)...", "") f1, f2, f3 = st.columns(3) with f1: selected_sector = st.multiselect("Filter by Sector", sme_df[SME_SECTOR_COL].unique() if SME_SECTOR_COL else []) with f2: if 'Readiness_Trimmed' in sme_df.columns: selected_readiness = st.multiselect("Filter by Collaboration Intent", sme_df['Readiness_Trimmed'].unique()) else: selected_readiness = [] with f3: if 'Funding_Trimmed' in sme_df.columns: selected_funding = st.multiselect("Filter by Funding Status", sme_df['Funding_Trimmed'].unique()) else: selected_funding = [] filtered_sme = sme_df.copy() if search_query: search_query_lower = search_query.lower() filtered_sme = filtered_sme[filtered_sme.apply(lambda row: row.astype(str).str.lower().str.contains(search_query_lower).any(), axis=1)] if selected_sector: filtered_sme = filtered_sme[filtered_sme[SME_SECTOR_COL].isin(selected_sector)] if selected_readiness: filtered_sme = filtered_sme[filtered_sme['Readiness_Trimmed'].isin(selected_readiness)] if selected_funding: filtered_sme = filtered_sme[filtered_sme['Funding_Trimmed'].isin(selected_funding)] st.caption(f"Found **{len(filtered_sme)}** profiles.") display_cols = [SME_NAME_COL, SME_SECTOR_COL, 'Size_Trimmed', SME_CHAL_COL, 'Readiness_Trimmed', 'Funding_Trimmed'] display_cols = [c for c in display_cols if c in filtered_sme.columns] st.dataframe( filtered_sme[display_cols], use_container_width=True, hide_index=True, column_config={ SME_NAME_COL: st.column_config.TextColumn("Company Name", width="medium"), SME_SECTOR_COL: st.column_config.TextColumn("Sector", width="medium"), 'Size_Trimmed': st.column_config.TextColumn("Size", width="small"), SME_CHAL_COL: st.column_config.TextColumn("Core Challenge", width="large"), 'Readiness_Trimmed': st.column_config.TextColumn("Collaboration Intent", width="medium"), } ) else: st.markdown("Browsing market validation targets extracted from official regional channels.") if not leads_df.empty: search_query = st.text_input("🔍 Search Scraped Corporate Profiles...", "") filtered_leads = leads_df.copy() if search_query: search_query_lower = search_query.lower() filtered_leads = filtered_leads[filtered_leads.apply(lambda row: row.astype(str).str.lower().str.contains(search_query_lower).any(), axis=1)] st.dataframe(filtered_leads, use_container_width=True, hide_index=True) else: st.info("No scraped market files detected. Please drop 'investpenang_scraped_leads.csv' inside your repository directory layout.") # ========================================== # VIEW 3: MULTI-DIRECTIONAL AI MATCHMAKING # ========================================== elif view_mode == "🧠 AI Matchmaking Engine": st.title("🧠 Multi-Directional AI Matchmaking Engine") st.markdown("Perform bi-directional Level-2 NLP overlap matching between Industry pain points and Academic expertise.") if not sme_df.empty and not usm_df.empty: match_type = st.radio("Select Match Direction:", [ "🏢 SME ➡️ USM (Find Experts for a Company)", "🎓 USM ➡️ SME (Find Companies for an Expert)", "🔍 Custom Keyword Discovery", "🎯 Scraped Corporate Leads (Intelligent Semantic Outreach)" ], horizontal=True) st.markdown("---") if match_type == "🏢 SME ➡️ USM (Find Experts for a Company)": col1, col2 = st.columns(2) with col1: sme_list = sme_df[SME_NAME_COL].dropna().unique() if SME_NAME_COL else [] selected_sme = st.selectbox("1. Select Target SME:", sme_list) with col2: usm_clusters = usm_df[USM_EXP_COL].dropna().unique() if USM_EXP_COL else [] filter_cluster = st.multiselect("2. (Optional) Filter Results by USM Expertise Cluster:", usm_clusters) if selected_sme: sme_data = sme_df[sme_df[SME_NAME_COL] == selected_sme].iloc[0] sme_context = str(sme_data.get('LLM_Context', '')).lower() st.info(f"**Target SME Challenge:** {sme_data.get(SME_CHAL_COL, 'N/A')}") sme_words = re.findall(r'\b\w+\b', sme_context) sme_lemmas = set([lemmatizer.lemmatize(w) for w in sme_words]) usm_pool = usm_df.copy() if filter_cluster: usm_pool = usm_pool[usm_pool[USM_EXP_COL].isin(filter_cluster)] matches = [] for idx, usm_row in usm_pool.iterrows(): usm_keywords_raw = str(usm_row.get(USM_KEYWORD_COL, '')).lower().replace(",", " ").split() score = 0 for kw in usm_keywords_raw: if len(kw) > 3: kw_lemma = lemmatizer.lemmatize(kw) if kw_lemma in sme_lemmas or kw in sme_context: score += 1 if score > 0: matches.append({ "Match Score": score, "USM PTJ / Expert": usm_row.get(USM_PTJ_COL, "N/A"), "Expertise Cluster": usm_row.get(USM_EXP_COL, "N/A"), "Description": usm_row.get(USM_DESC_COL, "N/A") }) if matches: st.success(f"✅ Found {len(matches)} USM matches based on Level 2 root-word overlap.") match_df = pd.DataFrame(matches).sort_values(by="Match Score", ascending=False) st.dataframe(match_df, use_container_width=True, hide_index=True) else: st.warning("No direct keyword overlaps found. Try removing cluster filters.") elif match_type == "🎓 USM ➡️ SME (Find Companies for an Expert)": col1, col2, col3 = st.columns(3) with col1: usm_list = usm_df[USM_PTJ_COL].dropna().unique() if USM_PTJ_COL else [] selected_usm = st.selectbox("1. Select USM Expert/PTJ:", usm_list) with col2: sme_sectors = sme_df[SME_SECTOR_COL].dropna().unique() if SME_SECTOR_COL else [] filter_sector = st.multiselect("2. (Optional) Filter SMEs by Sector:", sme_sectors) with col3: sme_readiness = sme_df['Readiness_Trimmed'].dropna().unique() if 'Readiness_Trimmed' in sme_df.columns else [] filter_readiness = st.multiselect("3. (Optional) Filter SMEs by Readiness:", sme_readiness) if selected_usm: usm_data = usm_df[usm_df[USM_PTJ_COL] == selected_usm].iloc[0] usm_keywords = str(usm_data.get(USM_KEYWORD_COL, '')).lower().replace(",", " ").split() st.info(f"**USM Target Keywords:** {', '.join(usm_keywords)}") sme_pool = sme_df.copy() if filter_sector: sme_pool = sme_pool[sme_pool[SME_SECTOR_COL].isin(filter_sector)] if filter_readiness: sme_pool = sme_pool[sme_pool['Readiness_Trimmed'].isin(filter_readiness)] usm_lemmas = [lemmatizer.lemmatize(kw) for kw in usm_keywords if len(kw) > 3] matches = [] for idx, sme_row in sme_pool.iterrows(): sme_context = str(sme_row.get('LLM_Context', '')).lower() sme_words = re.findall(r'\b\w+\b', sme_context) sme_lemmas_set = set([lemmatizer.lemmatize(w) for w in sme_words]) score = sum(1 for kw_lemma in usm_lemmas if kw_lemma in sme_lemmas_set) if score == 0: score = sum(1 for kw in usm_keywords if len(kw) > 3 and kw in sme_context) if score > 0: matches.append({ "Match Score": score, "Company Name": sme_row.get(SME_NAME_COL, "N/A"), "Sector": sme_row.get(SME_SECTOR_COL, "N/A"), "Challenge": sme_row.get(SME_CHAL_COL, "N/A"), "Intent": sme_row.get('Readiness_Trimmed', "N/A") }) if matches: st.success(f"✅ Found {len(matches)} SME matches requiring this expertise.") match_df = pd.DataFrame(matches).sort_values(by="Match Score", ascending=False) st.dataframe(match_df, use_container_width=True, hide_index=True) else: st.warning("No SMEs found with overlapping keywords in the selected filters.") elif match_type == "🔍 Custom Keyword Discovery": custom_kw = st.text_input("Type a technology, problem, or skill (e.g., 'automation', 'IoT', 'makanan'):") if custom_kw: custom_kw = custom_kw.lower().strip() kw_lemma = lemmatizer.lemmatize(custom_kw) st.markdown(f"### Results mapped for root concept: **{kw_lemma}**") c1, c2 = st.columns(2) with c1: st.markdown("##### 🏢 SMEs mentioning this:") if 'LLM_Context' in sme_df.columns: sme_hits = sme_df[sme_df['LLM_Context'].str.lower().apply( lambda text: kw_lemma in [lemmatizer.lemmatize(w) for w in re.findall(r'\b\w+\b', text)] or custom_kw in text )] if not sme_hits.empty: display_hits = sme_hits[[SME_NAME_COL, SME_SECTOR_COL, SME_CHAL_COL]].dropna(axis=1) st.dataframe(display_hits, hide_index=True, use_container_width=True) else: st.info("No SMEs found.") with c2: st.markdown("##### 🎓 USM Experts mentioning this:") usm_hits = usm_df[usm_df.astype(str).apply( lambda x: x.str.lower().apply(lambda text: kw_lemma in [lemmatizer.lemmatize(w) for w in re.findall(r'\b\w+\b', text)] or custom_kw in text) ).any(axis=1)] if not usm_hits.empty: display_hits_usm = usm_hits[[USM_PTJ_COL, USM_EXP_COL, USM_DESC_COL]].dropna(axis=1) st.dataframe(display_hits_usm, hide_index=True, use_container_width=True) else: st.info("No USM Experts found.") elif match_type == "🎯 Scraped Corporate Leads (Intelligent Semantic Outreach)": st.markdown("This engine bridges USM Expert Keywords with InvestPenang Sectors using **Level 2 Bi-Lingual Semantic Expansion**.") if not leads_df.empty and not usm_df.empty: usm_list = usm_df[USM_PTJ_COL].dropna().unique() selected_usm = st.selectbox("Select Academic Expert / PTJ:", usm_list) if selected_usm: usm_data = usm_df[usm_df[USM_PTJ_COL] == selected_usm].iloc[0] expert_keywords_raw = str(usm_data.get(USM_KEYWORD_COL, '')).lower().replace(",", " ").split() clean_keywords = [kw.strip() for kw in expert_keywords_raw if len(kw) > 3] st.info(f"**Academic Expertise Signature Keywords:** {', '.join(clean_keywords)}") taxonomy = { "Manufacturing & Automation": { "triggers": ["automation", "robotic", "electronic", "circuit", "pcb", "semiconductor", "machine", "equipment", "system", "control", "ems", "led", "automasi", "robotik", "elektronik", "litar", "mesin", "peralatan", "pembuatan", "sistem", "kawalan"], "sectors": ["Automation Solutions", "Machinery and Equipments", "EMS and Contract Manufacturing", "LED and Lighting", "Electrical"] }, "Materials & Precision Tooling": { "triggers": ["material", "precision", "metal", "rubber", "polymer", "plastic", "ceramic", "coating", "treatment", "moulding", "chemical", "fabrication", "tooling", "bahan", "logam", "getah", "polimer", "plastik", "seramik", "salutan", "kimia", "acuan"], "sectors": ["Precision, Machining and Fabric", "Rubber Products", "Plastic Products", "Surface Treatment and Plating", "Chemical Industry and Advanced Material", "Plastic Injection Moulding"] }, "Agro-Based & Food Sciences": { "triggers": ["food", "nutrition", "agro", "biology", "chemistry", "ingredient", "starch", "packaging", "biotech", "pharma", "makanan", "nutrisi", "pertanian", "biologi", "pembungkusan", "farmasi", "ramuan"], "sectors": ["Food Product and Food Ingredient", "Biotechnology and Pharmaceutical", "Packaging Services and Materials"] } } for cat in taxonomy: taxonomy[cat]["lemmas"] = set([lemmatizer.lemmatize(t) for t in taxonomy[cat]["triggers"]]) lead_matches = [] for idx, lead_row in leads_df.iterrows(): c_name = str(lead_row.get("Company Name", '')).lower() c_sector = str(lead_row.get("Business Sector", '')) c_sector_lower = c_sector.lower() lead_words = re.findall(r'\b\w+\b', c_name + " " + c_sector_lower) lead_lemmas = set([lemmatizer.lemmatize(w) for w in lead_words]) score = 0 matched_tags = [] for kw in clean_keywords: kw_lemma = lemmatizer.lemmatize(kw) if kw in c_name or kw in c_sector_lower or kw_lemma in lead_lemmas: score += 3 matched_tags.append(kw) for category, data in taxonomy.items(): if any(lemmatizer.lemmatize(kw) in data["lemmas"] for kw in clean_keywords): if any(sec.lower() in c_sector_lower for sec in data["sectors"]): score += 5 matched_tags.append(category) if score > 0: matched_tags = list(set(matched_tags)) lead_matches.append({ "Match Conf. Score": score, "Target Company": lead_row.get("Company Name"), "Industry Sector": c_sector, "Alignment Drivers": ", ".join(matched_tags).title(), "Email Destination": lead_row.get("Email", ""), "Contact Person": lead_row.get("Contact Person", "Managing Director"), "Designation": lead_row.get("Designation", "Leadership Team") }) if lead_matches: st.success(f"✅ Found {len(lead_matches)} highly aligned local market targets based on exact and Level-2 semantic matching.") lead_match_df = pd.DataFrame(lead_matches).sort_values(by="Match Conf. Score", ascending=False) m_col1, m_col2 = st.columns([3, 2]) with m_col1: st.markdown("##### High-Probability Matrix Matches") st.dataframe(lead_match_df[["Match Conf. Score", "Target Company", "Industry Sector", "Alignment Drivers"]], use_container_width=True, hide_index=True) with m_col2: st.markdown("##### ✉️ Trigger Smart Outreach Automation") with st.expander("⚙️ Setup Direct Email Sender (Required)"): st.markdown("Provide your email credentials to dispatch emails directly from the server.") sender_email = st.text_input("Your Email Address", placeholder="e.g., yourname@gmail.com") sender_pass = st.text_input("App Password", type="password", help="If using Gmail, use a 16-digit Google App Password, not your standard login password.") target_pitch_co = st.selectbox("Select Target Company to Blueprint Email:", lead_match_df["Target Company"].unique()) target_lead_info = lead_match_df[lead_match_df["Target Company"] == target_pitch_co].iloc[0] alignment_reason = target_lead_info.get('Alignment Drivers', 'strategic overlaps') destination_email = target_lead_info.get('Email Destination', '') email_subject = "Strategic Academic-Industry Research Integration Proposal - USM" email_template = f"Dear {target_lead_info.get('Contact Person') or 'Managing Director'} ({target_lead_info.get('Designation') or 'Leadership Team'}),\n\nI hope this message finds you well.\n\nWe identified {target_pitch_co} via the InvestPenang Industry Registry as a key stakeholder within the {target_lead_info.get('Industry Sector')} sector.\n\nUniversiti Sains Malaysia (USM) is currently deploying our Strategic Integration Engine. Based on an algorithmic intersection analysis, your industry vertical directly aligns with the research clusters led by {selected_usm}.\n\nSpecifically, our system flagged strong potential synergies regarding: {alignment_reason}.\n\nWe would welcome the opportunity to benchmark your current operational challenges with our expert technical systems.\n\nAre you available for a brief introductory call next week?\n\nBest Regards,\nEcosystem Secretariat\nUniversiti Sains Malaysia (USM)" st.text_area("Pre-compiled Outreach Correspondence Draft:", email_template, height=310) st.markdown(f"**To:** `{destination_email}`") if st.button("🚀 Send Pitch Email Directly", use_container_width=True): if not sender_email or not sender_pass: st.error("Please configure your Sender Email and App Password in the ⚙️ Setup expander above.") elif not destination_email or destination_email == "N/A": st.error("This company profile does not have a valid email address scraped.") else: with st.spinner("Dispatching email..."): success, msg = send_direct_email(sender_email, sender_pass, destination_email, email_subject, email_template) if success: st.success(f"✅ {msg}") st.balloons() else: st.error(f"❌ {msg}") else: st.warning("No localized industry matches found for this expert's keyword string. Ensure the taxonomy dictionary covers their domain.") # ========================================== # VIEW 4: REGISTRATION / FORM IFRAME # ========================================== elif view_mode == "📝 Submit Profile": st.title("📝 Join the Ecosystem") st.markdown("Fill out the form below to register your profile. Alternatively, you can access the direct form link [here](https://forms.gle/97jg6JupcDnMYF3VA).") col1, col2, col3 = st.columns([1, 10, 1]) with col2: components.html( """ """, height=800, ) # ========================================== # VIEW 5: ADMIN SUITE # ========================================== elif view_mode == "🔒 Admin Suite": st.title("🔒 Secured Admin Center") password = st.text_input("Committee Authentication Key", type="password") if password == "admin123": st.success("Authentication Successful: Full Access Granted") tab1, tab2 = st.tabs(["🏭 SME Master Database", "🎓 USM Experts Database"]) with tab1: st.dataframe(sme_df, use_container_width=True) st.download_button("⬇️ Download Master SME (CSV)", sme_df.to_csv(index=False), "sme_master.csv", "text/csv") with tab2: st.dataframe(usm_df, use_container_width=True) st.download_button("⬇️ Download Master USM (CSV)", usm_df.to_csv(index=False), "usm_master.csv", "text/csv") elif password: st.error("Invalid Key")