Skip to content

Commit 67a26dc

Browse files
jeremymanningclaude
andcommitted
Update validation script and tests for new structured format
- Remove citation column requirement from validation (now built from fields) - Allow local file paths (data/pdfs/...) as valid URLs - Update tests to use new structured field format instead of citation/links_html 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 53ef646 commit 67a26dc

File tree

4 files changed

+67
-33
lines changed

4 files changed

+67
-33
lines changed

scripts/validate_data.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def validate_publications(project_root: Path) -> List[str]:
1919
"""Validate publications.xlsx data.
2020
2121
Returns list of error messages (empty if valid).
22+
23+
Note: Citations are now built from structured fields (authors, year, title, etc.)
24+
rather than a single 'citation' column. Only 'title' is required.
2225
"""
2326
errors = []
2427
xlsx_path = project_root / 'data' / 'publications.xlsx'
@@ -33,7 +36,8 @@ def validate_publications(project_root: Path) -> List[str]:
3336
errors.append(f"Error loading {xlsx_path}: {e}")
3437
return errors
3538

36-
required_fields = ['title', 'citation']
39+
# Only title is required - citations are built from structured fields
40+
required_fields = ['title']
3741
image_dir = project_root / 'images' / 'publications'
3842

3943
for sheet_name, items in data.items():
@@ -44,9 +48,10 @@ def validate_publications(project_root: Path) -> List[str]:
4448
if not val or (isinstance(val, str) and not val.strip()):
4549
errors.append(f"publications/{sheet_name} row {i}: missing {field}")
4650

47-
# Check URL format (validate_url_format returns True if valid)
51+
# Check URL format - allow local file paths (data/pdfs/...) which get
52+
# converted to GitHub URLs by the build script
4853
url = item.get('title_url', '')
49-
if url and not validate_url_format(url):
54+
if url and not validate_url_format(url) and not url.startswith('data/'):
5055
errors.append(f"publications/{sheet_name} row {i}: invalid URL '{url}'")
5156

5257
# Check image file exists (check_file_exists returns error msg or None)

tests/test_build_people.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def test_generates_director_with_links(self):
9595
'name_url': '',
9696
'role': 'lab director',
9797
'bio': 'Bio paragraph here.',
98-
'links_html': '[<a href="#">CV</a>]'
98+
'links_html': 'CV:http://example.com/cv'
9999
}
100100

101101
html = generate_director_content(director)
@@ -105,7 +105,8 @@ def test_generates_director_with_links(self):
105105
assert 'jeremy manning' in html
106106
assert 'lab director' in html
107107
assert 'Bio paragraph here.' in html
108-
assert '[<a href="#">CV</a>]' in html
108+
assert 'CV' in html
109+
assert 'http://example.com/cv' in html
109110

110111
def test_handles_no_links(self):
111112
"""Test director without links."""

tests/test_build_publications.py

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -90,30 +90,33 @@ def test_generates_basic_card(self):
9090
'image': 'test.png',
9191
'title': 'Test Paper',
9292
'title_url': 'http://example.com',
93-
'citation': 'Author (2024) Test Paper',
94-
'links_html': '[<a href="#">PDF</a>]'
93+
'authors': 'Author A',
94+
'year': '2024',
95+
'journal': 'Journal',
96+
'pdf_link': 'http://example.com/pdf'
9597
}
9698

97-
html = generate_publication_card(pub)
99+
html = generate_publication_card(pub, 'papers')
98100

99101
assert '<div class="publication-card">' in html
100102
assert 'images/publications/test.png' in html
101103
assert 'Test Paper' in html
102104
assert 'http://example.com' in html
103-
assert 'Author (2024) Test Paper' in html
104-
assert '[<a href="#">PDF</a>]' in html
105+
assert 'Author A' in html
106+
assert '2024' in html
105107

106108
def test_handles_no_url(self):
107109
"""Test card with no title URL."""
108110
pub = {
109111
'image': 'test.png',
110112
'title': 'Test Paper',
111113
'title_url': '',
112-
'citation': 'Citation',
113-
'links_html': ''
114+
'authors': 'Author',
115+
'year': '2024',
116+
'journal': 'Journal'
114117
}
115118

116-
html = generate_publication_card(pub)
119+
html = generate_publication_card(pub, 'papers')
117120

118121
assert '<h4>Test Paper</h4>' in html
119122
assert 'href=' not in html.split('<h4>')[1].split('</h4>')[0]
@@ -124,25 +127,30 @@ def test_handles_no_links(self):
124127
'image': 'test.png',
125128
'title': 'Test Paper',
126129
'title_url': '',
127-
'citation': 'Citation',
128-
'links_html': ''
130+
'authors': 'Author',
131+
'year': '2024',
132+
'journal': 'Journal'
129133
}
130134

131-
html = generate_publication_card(pub)
135+
html = generate_publication_card(pub, 'papers')
132136

133137
assert 'publication-links' not in html
134138

135-
def test_preserves_html_in_citation(self):
136-
"""Test that HTML in citation is preserved."""
139+
def test_generates_citation_with_journal(self):
140+
"""Test that citation includes journal in italics."""
137141
pub = {
138142
'image': 'test.png',
139143
'title': 'Test',
140144
'title_url': '',
141-
'citation': 'Author (2024) <em>Journal Name</em>, 1(1): 1-10.',
142-
'links_html': ''
145+
'authors': 'Author',
146+
'year': '2024',
147+
'journal': 'Journal Name',
148+
'volume': '1',
149+
'issue': '1',
150+
'pages': '1-10'
143151
}
144152

145-
html = generate_publication_card(pub)
153+
html = generate_publication_card(pub, 'papers')
146154

147155
assert '<em>Journal Name</em>' in html
148156

@@ -153,19 +161,19 @@ class TestGenerateSectionContent:
153161
def test_generates_multiple_cards(self):
154162
"""Test generating content with multiple publications."""
155163
pubs = [
156-
{'image': 'a.png', 'title': 'Paper A', 'title_url': '', 'citation': 'A', 'links_html': ''},
157-
{'image': 'b.png', 'title': 'Paper B', 'title_url': '', 'citation': 'B', 'links_html': ''},
164+
{'image': 'a.png', 'title': 'Paper A', 'title_url': '', 'authors': 'A', 'year': '2024', 'journal': 'J'},
165+
{'image': 'b.png', 'title': 'Paper B', 'title_url': '', 'authors': 'B', 'year': '2024', 'journal': 'J'},
158166
]
159167

160-
html = generate_section_content(pubs)
168+
html = generate_section_content(pubs, 'papers')
161169

162170
assert html.count('publication-card') == 2
163171
assert 'Paper A' in html
164172
assert 'Paper B' in html
165173

166174
def test_handles_empty_list(self):
167175
"""Test generating content with no publications."""
168-
html = generate_section_content([])
176+
html = generate_section_content([], 'papers')
169177

170178
assert html == ''
171179

@@ -188,11 +196,20 @@ def test_builds_complete_page(self, temp_dir):
188196

189197
for sheet_name in ['papers', 'chapters', 'dissertations', 'talks', 'courses', 'posters']:
190198
ws = wb.create_sheet(title=sheet_name)
191-
ws.append(['image', 'title', 'title_url', 'citation', 'links_html'])
192199
if sheet_name == 'papers':
200+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'journal', 'volume', 'pages'])
193201
ws.append(['paper.png', 'Test Paper', 'http://example.com',
194-
'Author (2024) Test Paper. <em>Journal</em>.',
195-
'[<a href="#">PDF</a>]'])
202+
'Author A', '2024', 'Journal Name', '1', '1-10'])
203+
elif sheet_name == 'chapters':
204+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'editors', 'book_title', 'publisher'])
205+
elif sheet_name == 'dissertations':
206+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'degree_type', 'institution', 'location'])
207+
elif sheet_name == 'talks':
208+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'venue_name'])
209+
elif sheet_name == 'courses':
210+
ws.append(['image', 'title', 'title_url', 'description'])
211+
elif sheet_name == 'posters':
212+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'conference', 'location'])
196213

197214
wb.save(xlsx_path)
198215

@@ -218,7 +235,7 @@ def test_builds_complete_page(self, temp_dir):
218235
result = output_path.read_text()
219236
assert 'Test Paper' in result
220237
assert 'paper.png' in result
221-
assert '<em>Journal</em>' in result
238+
assert '<em>Journal Name</em>' in result
222239

223240
def test_handles_special_characters(self, temp_dir):
224241
"""Test that special characters are preserved."""
@@ -228,10 +245,20 @@ def test_handles_special_characters(self, temp_dir):
228245

229246
for sheet_name in ['papers', 'chapters', 'dissertations', 'talks', 'courses', 'posters']:
230247
ws = wb.create_sheet(title=sheet_name)
231-
ws.append(['image', 'title', 'title_url', 'citation', 'links_html'])
232248
if sheet_name == 'papers':
249+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'journal'])
233250
ws.append(['test.png', "Paper & O'Brien's \"quoted\"", '',
234-
"Author & O'Brien (2024)", ''])
251+
"Author & O'Brien", '2024', 'Journal'])
252+
elif sheet_name == 'chapters':
253+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'editors', 'book_title', 'publisher'])
254+
elif sheet_name == 'dissertations':
255+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'degree_type', 'institution', 'location'])
256+
elif sheet_name == 'talks':
257+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'venue_name'])
258+
elif sheet_name == 'courses':
259+
ws.append(['image', 'title', 'title_url', 'description'])
260+
elif sheet_name == 'posters':
261+
ws.append(['image', 'title', 'title_url', 'authors', 'year', 'conference', 'location'])
235262

236263
wb.save(xlsx_path)
237264

tests/test_build_software.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def test_generates_complete_item(self):
8585
item = {
8686
'name': 'MyTool',
8787
'description': 'Does something useful.',
88-
'links_html': '[<a href="#">GitHub</a>]'
88+
'github_link': 'https://github.com/example/mytool'
8989
}
9090

9191
html = generate_software_item(item)
@@ -94,7 +94,8 @@ def test_generates_complete_item(self):
9494
assert '</p>' in html
9595
assert '<strong>MyTool.</strong>' in html
9696
assert 'Does something useful.' in html
97-
assert '[<a href="#">GitHub</a>]' in html
97+
assert 'GitHub' in html
98+
assert 'https://github.com/example/mytool' in html
9899

99100
def test_handles_missing_links(self):
100101
"""Test item without links."""

0 commit comments

Comments
 (0)