Vedant Jigarbhai Mehta commited on
Commit
aaf9ca5
·
1 Parent(s): aed5d76

fix search count, cluster data, and network filter

Browse files
backend/routes/clusters.py CHANGED
@@ -93,7 +93,7 @@ def get_clusters():
93
  # Get texts for labeling
94
  texts = [r[0] for r in conn.execute("SELECT combined_text FROM posts ORDER BY rowid").fetchall()]
95
 
96
- # Generate labels
97
  clusters = {}
98
  for i in range(k):
99
  cluster_texts = [t for t, l in zip(texts, labels) if l == i]
@@ -111,26 +111,47 @@ def get_clusters():
111
  label = f"Cluster {i}"
112
 
113
  cluster_post_ids = [post_ids[j] for j in range(len(labels)) if labels[j] == i]
114
- pids_sample = cluster_post_ids[:10]
115
- placeholders = ','.join(['?' for _ in pids_sample])
116
- top = conn.execute(f"""
117
- SELECT id, title, subreddit, score FROM posts
118
- WHERE id IN ({placeholders})
119
- ORDER BY score DESC LIMIT 5
120
- """, pids_sample).fetchall()
121
-
122
- clusters[i] = {
123
  'id': i,
124
  'label': label,
125
  'size': len(cluster_post_ids),
126
- 'top_posts': [{'id': t[0], 'title': t[1], 'subreddit': t[2], 'score': t[3]} for t in top]
 
127
  }
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  conn.close()
130
 
 
 
 
 
131
  result = {
132
- 'clusters': list(clusters.values()),
133
  'k': k,
 
134
  }
135
  if was_clamped:
136
  result['warning'] = f'Requested k={original_k} was clamped to {k} (valid range: {MIN_K}-{MAX_K})'
 
93
  # Get texts for labeling
94
  texts = [r[0] for r in conn.execute("SELECT combined_text FROM posts ORDER BY rowid").fetchall()]
95
 
96
+ # Generate labels and gather full per-cluster data
97
  clusters = {}
98
  for i in range(k):
99
  cluster_texts = [t for t, l in zip(texts, labels) if l == i]
 
111
  label = f"Cluster {i}"
112
 
113
  cluster_post_ids = [post_ids[j] for j in range(len(labels)) if labels[j] == i]
114
+
115
+ cluster_data = {
 
 
 
 
 
 
 
116
  'id': i,
117
  'label': label,
118
  'size': len(cluster_post_ids),
119
+ 'top_posts': [],
120
+ 'subreddits': [],
121
  }
122
 
123
+ # Top 10 posts by score and subreddit breakdown
124
+ if cluster_post_ids:
125
+ placeholders = ','.join(['?' for _ in cluster_post_ids])
126
+
127
+ top = conn.execute(f"""
128
+ SELECT id, title, subreddit, score, author, permalink, created_date FROM posts
129
+ WHERE id IN ({placeholders})
130
+ ORDER BY score DESC LIMIT 10
131
+ """, cluster_post_ids).fetchall()
132
+ cluster_data['top_posts'] = [
133
+ {'id': t[0], 'title': t[1], 'subreddit': t[2], 'score': t[3],
134
+ 'author': t[4], 'permalink': t[5], 'date': t[6]} for t in top
135
+ ]
136
+
137
+ sub_counts = conn.execute(f"""
138
+ SELECT subreddit, COUNT(*) as count FROM posts
139
+ WHERE id IN ({placeholders}) GROUP BY subreddit ORDER BY count DESC
140
+ """, cluster_post_ids).fetchall()
141
+ cluster_data['subreddits'] = [{'name': s[0], 'count': s[1]} for s in sub_counts]
142
+
143
+ clusters[i] = cluster_data
144
+
145
  conn.close()
146
 
147
+ cluster_list = list(clusters.values())
148
+ from services.llm_service import generate_cluster_summary
149
+ summary = generate_cluster_summary(cluster_list, k)
150
+
151
  result = {
152
+ 'clusters': cluster_list,
153
  'k': k,
154
+ 'summary': summary,
155
  }
156
  if was_clamped:
157
  result['warning'] = f'Requested k={original_k} was clamped to {k} (valid range: {MIN_K}-{MAX_K})'
backend/routes/network.py CHANGED
@@ -42,16 +42,20 @@ def get_graph():
42
  nodes_to_keep = [n for n in G.nodes() if G.degree(n) >= min_degree]
43
  subgraph = G.subgraph(nodes_to_keep).copy()
44
 
 
 
45
  edge_key = 'links'
 
 
 
 
 
 
46
  result = {
47
  'nodes': [{'id': n, **subgraph.nodes[n]} for n in subgraph.nodes()],
48
  edge_key: [{'source': u, 'target': v, **d} for u, v, d in subgraph.edges(data=True)],
49
- 'stats': {
50
- 'num_nodes': subgraph.number_of_nodes(),
51
- 'num_edges': subgraph.number_of_edges(),
52
- 'num_components': nx.number_connected_components(subgraph),
53
- 'density': round(nx.density(subgraph), 6) if subgraph.number_of_nodes() > 1 else 0
54
- }
55
  }
56
 
57
  return jsonify(result)
@@ -59,26 +63,35 @@ def get_graph():
59
 
60
  @network_bp.route('/remove-node/<author>')
61
  def remove_node(author):
 
62
  graph_data = current_app.config['graph_data']
63
- G = graph_from_data(graph_data)
64
 
65
- if author not in G:
66
  return jsonify({
67
  'error': True,
68
  'message': f'Author "{author}" not found in the network.'
69
  }), 404
70
 
71
- # Stats before removal
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  components_before = nx.number_connected_components(G)
73
  nodes_before = G.number_of_nodes()
74
  edges_before = G.number_of_edges()
75
 
76
- # Find which component the author belongs to
77
- for comp in nx.connected_components(G):
78
- if author in comp:
79
- original_component_size = len(comp)
80
- break
81
-
82
  # Remove the node
83
  removed_degree = G.degree(author)
84
  removed_pagerank = G.nodes[author].get('pagerank', 0)
@@ -105,7 +118,7 @@ def remove_node(author):
105
  f"Components: {components_after}. {edges_before - edges_after} edges removed."
106
  )
107
 
108
- # Return updated graph
109
  edge_key = 'links'
110
  result = {
111
  'nodes': [{'id': n, **G.nodes[n]} for n in G.nodes()],
 
42
  nodes_to_keep = [n for n in G.nodes() if G.degree(n) >= min_degree]
43
  subgraph = G.subgraph(nodes_to_keep).copy()
44
 
45
+ from services.llm_service import generate_network_summary
46
+
47
  edge_key = 'links'
48
+ filtered_stats = {
49
+ 'num_nodes': subgraph.number_of_nodes(),
50
+ 'num_edges': subgraph.number_of_edges(),
51
+ 'num_components': nx.number_connected_components(subgraph),
52
+ 'density': round(nx.density(subgraph), 6) if subgraph.number_of_nodes() > 1 else 0
53
+ }
54
  result = {
55
  'nodes': [{'id': n, **subgraph.nodes[n]} for n in subgraph.nodes()],
56
  edge_key: [{'source': u, 'target': v, **d} for u, v, d in subgraph.edges(data=True)],
57
+ 'stats': filtered_stats,
58
+ 'summary': generate_network_summary(filtered_stats),
 
 
 
 
59
  }
60
 
61
  return jsonify(result)
 
63
 
64
  @network_bp.route('/remove-node/<author>')
65
  def remove_node(author):
66
+ min_degree = request.args.get('min_degree', 1, type=int)
67
  graph_data = current_app.config['graph_data']
68
+ G_full = graph_from_data(graph_data)
69
 
70
+ if author not in G_full:
71
  return jsonify({
72
  'error': True,
73
  'message': f'Author "{author}" not found in the network.'
74
  }), 404
75
 
76
+ # Apply the same min_degree filter the graph view is using
77
+ if min_degree > 1:
78
+ nodes_to_keep = [n for n in G_full.nodes() if G_full.degree(n) >= min_degree]
79
+ G = G_full.subgraph(nodes_to_keep).copy()
80
+ else:
81
+ G = G_full.copy()
82
+
83
+ # If the author was filtered out by min_degree, they're not in the visible graph
84
+ if author not in G:
85
+ return jsonify({
86
+ 'error': True,
87
+ 'message': f'Author "{author}" is not visible at min degree {min_degree}.'
88
+ }), 404
89
+
90
+ # Stats before removal (within the filtered graph)
91
  components_before = nx.number_connected_components(G)
92
  nodes_before = G.number_of_nodes()
93
  edges_before = G.number_of_edges()
94
 
 
 
 
 
 
 
95
  # Remove the node
96
  removed_degree = G.degree(author)
97
  removed_pagerank = G.nodes[author].get('pagerank', 0)
 
118
  f"Components: {components_after}. {edges_before - edges_after} edges removed."
119
  )
120
 
121
+ # Return updated graph (respecting min_degree filter)
122
  edge_key = 'links'
123
  result = {
124
  'nodes': [{'id': n, **G.nodes[n]} for n in G.nodes()],
backend/routes/search.py CHANGED
@@ -23,7 +23,7 @@ def detect_language(text):
23
  def search():
24
  data = request.get_json() or {}
25
  query = data.get('message', '').strip()
26
- limit = data.get('limit', 20)
27
 
28
  # Edge case: conversational/greeting queries
29
  greetings = [
 
23
  def search():
24
  data = request.get_json() or {}
25
  query = data.get('message', '').strip()
26
+ limit = data.get('limit', 10)
27
 
28
  # Edge case: conversational/greeting queries
29
  greetings = [
frontend/src/pages/Clusters.jsx CHANGED
@@ -20,6 +20,9 @@ const SUBREDDIT_COLORS = {
20
  neoliberal: '#6366f1', worldpolitics: '#14b8a6', Conservative: '#f97316', Republican: '#ea580c'
21
  }
22
 
 
 
 
23
  export default function Clusters() {
24
  const [k, setK] = useState(8)
25
  const [debouncedK, setDebouncedK] = useState(8)
@@ -71,11 +74,20 @@ export default function Clusters() {
71
  onChange={e => setK(Number(e.target.value))}
72
  className="flex-1 max-w-xs" />
73
  <span className="text-2xl font-bold text-indigo-600 w-12 text-center">{k}</span>
 
 
 
 
 
 
 
 
 
74
  </div>
75
  {warning && <p className="text-sm text-amber-600 mt-2">{warning}</p>}
76
  <div className="flex items-center justify-between mt-3">
77
  <p className="text-xs text-gray-400">
78
- {clusters.length} clusters · {totalPosts.toLocaleString()} posts · KMeans on 384-dim embeddings
79
  </p>
80
  <Link to="/dashboard/embeddings" className="text-xs text-indigo-600 hover:text-indigo-800 font-medium">
81
  Explore full embedding map →
 
20
  neoliberal: '#6366f1', worldpolitics: '#14b8a6', Conservative: '#f97316', Republican: '#ea580c'
21
  }
22
 
23
+ // Pre-computed k values load instantly from SQLite; other k values are computed on the fly
24
+ const PRECOMPUTED_K = new Set([3, 5, 8, 10, 15, 20, 30, 50])
25
+
26
  export default function Clusters() {
27
  const [k, setK] = useState(8)
28
  const [debouncedK, setDebouncedK] = useState(8)
 
74
  onChange={e => setK(Number(e.target.value))}
75
  className="flex-1 max-w-xs" />
76
  <span className="text-2xl font-bold text-indigo-600 w-12 text-center">{k}</span>
77
+ {PRECOMPUTED_K.has(k) ? (
78
+ <span className="text-[10px] font-medium text-emerald-700 bg-emerald-50 border border-emerald-200 px-2 py-0.5 rounded-full">
79
+ Instant · pre-computed
80
+ </span>
81
+ ) : (
82
+ <span className="text-[10px] font-medium text-amber-700 bg-amber-50 border border-amber-200 px-2 py-0.5 rounded-full">
83
+ Computed on-the-fly
84
+ </span>
85
+ )}
86
  </div>
87
  {warning && <p className="text-sm text-amber-600 mt-2">{warning}</p>}
88
  <div className="flex items-center justify-between mt-3">
89
  <p className="text-xs text-gray-400">
90
+ {clusters.length} clusters · {totalPosts.toLocaleString()} posts · KMeans on 384-dim embeddings · Instant for k ∈ {'{'}3, 5, 8, 10, 15, 20, 30, 50{'}'}
91
  </p>
92
  <Link to="/dashboard/embeddings" className="text-xs text-indigo-600 hover:text-indigo-800 font-medium">
93
  Explore full embedding map →
frontend/src/pages/Network.jsx CHANGED
@@ -13,7 +13,7 @@ const COMMUNITY_COLORS = [
13
  export default function Network() {
14
  const [graphData, setGraphData] = useState(null)
15
  const [stats, setStats] = useState(null)
16
- const [minDegree, setMinDegree] = useState(2)
17
  const [selectedNode, setSelectedNode] = useState(null)
18
  const [removalImpact, setRemovalImpact] = useState(null)
19
  const [loading, setLoading] = useState(true)
@@ -95,7 +95,7 @@ export default function Network() {
95
  if (!selectedNode) return
96
  setRemoving(true)
97
  try {
98
- const res = await removeNetworkNode(selectedNode.id)
99
  setRemovalImpact(res.data)
100
  } catch (err) {
101
  console.error(err)
 
13
  export default function Network() {
14
  const [graphData, setGraphData] = useState(null)
15
  const [stats, setStats] = useState(null)
16
+ const [minDegree, setMinDegree] = useState(1)
17
  const [selectedNode, setSelectedNode] = useState(null)
18
  const [removalImpact, setRemovalImpact] = useState(null)
19
  const [loading, setLoading] = useState(true)
 
95
  if (!selectedNode) return
96
  setRemoving(true)
97
  try {
98
+ const res = await removeNetworkNode(selectedNode.id, { min_degree: minDegree })
99
  setRemovalImpact(res.data)
100
  } catch (err) {
101
  console.error(err)
frontend/src/services/api.js CHANGED
@@ -32,8 +32,8 @@ export const searchTimeSeries = (data) =>
32
  export const getNetworkGraph = (params) =>
33
  api.get('/network/graph', { params })
34
 
35
- export const removeNetworkNode = (author) =>
36
- api.get(`/network/remove-node/${encodeURIComponent(author)}`)
37
 
38
  // Clusters
39
  export const getClusters = (params) =>
 
32
  export const getNetworkGraph = (params) =>
33
  api.get('/network/graph', { params })
34
 
35
+ export const removeNetworkNode = (author, params = {}) =>
36
+ api.get(`/network/remove-node/${encodeURIComponent(author)}`, { params })
37
 
38
  // Clusters
39
  export const getClusters = (params) =>