Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
eaf99a3
(after rebase) updating dependencies
HugoOnghai Jun 20, 2025
6b96d33
Trying to get test_doi_builder to work
HugoOnghai Jun 21, 2025
46a688d
Revert "Trying to get test_doi_builder to work"
HugoOnghai Jun 23, 2025
588c54e
(after rebase) preliminary version of ELinkGetResponseModel made, usi…
HugoOnghai Jun 25, 2025
06a3a7d
(after rebase) queried all desired data entries (stored as batched js…
HugoOnghai Jun 27, 2025
4d1f471
(after rebase) almost done with pipeline, need to incorporate pymongo…
HugoOnghai Jul 3, 2025
a8172c9
(after rebase) template for core
tsmathis Jul 3, 2025
1286279
(after rebase) push latest doi_builder and other thigns
HugoOnghai Jul 3, 2025
5466b4f
find_out_of_date func
tsmathis Jul 3, 2025
f4325ba
remaining core functions
HugoOnghai Jul 8, 2025
ce7651d
testing core functionalities on review environment and local mongo da…
HugoOnghai Jul 10, 2025
4905bcb
(after rebase) Merged upstream (#1)
HugoOnghai Jul 12, 2025
c10e9b7
(after rebase) updated gitignore and moved linting to new branch
HugoOnghai Jul 16, 2025
9e89ad5
Remove accidentally committed files
HugoOnghai Jul 30, 2025
f5a1cf4
Cleaning up new-core PR, restore workflows and legacy stuff
HugoOnghai Aug 4, 2025
503696a
Clean up legacy for PR
HugoOnghai Aug 4, 2025
a66640b
Clean Up branch for PR (after rebasing after lint + release PRs were …
HugoOnghai Aug 5, 2025
1cef222
Cleaned up .gitignore
HugoOnghai Aug 5, 2025
a4ab306
Addresses comments raised in PR for core.py
HugoOnghai Aug 5, 2025
ed89067
Removed extraneous files from PR
HugoOnghai Aug 6, 2025
cf449f5
Removed models.py and cleaned up doi_builder.py
HugoOnghai Aug 6, 2025
1218576
Removed extraneous files for debugging from tests/ -- will let the te…
HugoOnghai Aug 6, 2025
8191ffc
improved DOIModel to accept RecordResponse splat, and cleaned up comm…
HugoOnghai Aug 7, 2025
99d1ca9
Addressed more comments, removed oldreqs, renamed doi_builder, and mo…
HugoOnghai Aug 7, 2025
3ef1405
move docstring to beginning of function
tsmathis Aug 7, 2025
5f21e46
remove empty docstring
tsmathis Aug 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions src/mp_cite/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from typing import TypeAlias

from elinkapi import Elink
from elinkapi.record import RecordResponse
from pymongo import MongoClient

import requests
from elinkapi.utils import Validation


from mp_cite.doi_builder import MinimumDARecord

from typing import Literal

OstiID: TypeAlias = int


def find_out_of_date_doi_entries(
rc_client: MongoClient,
doi_client: MongoClient,
robocrys_db: str,
robocrys_collection: str,
doi_db: str,
doi_collection: str,
) -> list[OstiID]:
"""
find_out_of_date_doi_entries queries MP's mongo collections to find all robocrys documents that were updated less recently than the latest doi document

:rc_client is the MongoClient used to access the robocrys collection
:doi_client is the MongoClient used to access the doi collection (since a new doi collection is planned, they clients are passed separately, though in the future they may be the same client.)
:robocrys_db is the name of the database the robocrys collection is in
:robocrys_collection is the name of the robocrys collection
:doi_db is the name of the database the doi collection is in
:doi_collection is the name of the doi collection

returns a list containing all OSTI IDs associated with out-of-date doi entries.
"""
robocrys = rc_client[robocrys_db][robocrys_collection]
dois = doi_client[doi_db][doi_collection]

latest_doi = next(
dois.aggregate(
[
{"$project": {"_id": 0, "date_metadata_updated": 1}},
{"$sort": {"date_metadata_updated": -1}},
{"$limit": 1},
]
)
)["date_metadata_updated"]

material_ids_to_update = list(
map(
lambda x: x["material_id"],
robocrys.find(
{"last_updated": {"$gt": latest_doi}}, {"_id": 0, "material_id": 1}
),
)
)

return list(
map(
lambda x: x["osti_id"],
dois.find(
{"material_id": {"$in": material_ids_to_update}},
{"_id": 0, "osti_id": 1},
),
),
)


def update_existing_osti_record(
elinkapi: Elink, osti_id: OstiID, new_values: dict
) -> RecordResponse:
"""
update_existing_osti_record allows users to provide a dictionary of keywords and new values, which will replace the old values under the same keywords in the record with the given osti id

:elinkapi is the instance of the elinkapi associated with the environment in which the record is held (e.g. either production or review environment)
:osti_id is the osti id of the record which ought to be updated
:new_values is a dictionary of keywords (which should exist in ELink's record model) and new value pairs.

N.B., it is currently assumed that the user will handle the "sponsor identifier bug"
--- in which the retreived record responses of validated records from the E-Link production environment seemingly
lack the required Sponsor Organization identifiers which were necessary for their submission (due to rearrangement of metadata
on E-Link's side) --- before calling this function.

Otherwise, the following code excerpt would need to be added to retroactively fix the issue with the sponsor organization's identifiers
for entry in record.organizations:
if entry.type == "SPONSOR":
entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}]
break

Instead, we leave this for the user.
"""

record_on_elink = elinkapi.get_single_record(osti_id)

for keyword in new_values:
setattr(record_on_elink, keyword, new_values[keyword])

return elinkapi.update_record(
osti_id, record_on_elink, state="save"
) # user should use update_state_of_osti_record to submit instead


def submit_new_osti_record(
elinkapi: Elink,
new_values: dict,
state="submit",
) -> RecordResponse:
"""
submit_new_osti_record generates a new record based on the provided keyword-value pairs in the new_values dict and the default minimum DA Record metadata necessary for submission

:elinkapi is the elinkapi (see previous)
:new_values is the dictionary of keywords and values which want to be included in the submitted record (besides or in lieu of default values). The title MUST be provided.
:state defaults to "submit" but the user can simply "save" if desired. This is done given our assumption that there is
no need to both with saving, rather, just only send new record to osti when it's ready for submission.

returns the record response after submission
"""

# template for all repeated stuff
# only submit
new_record = MinimumDARecord(
**new_values
) # record is an instance of the MinimumDARecord model which gives default values to all necessary fields (EXCEPT Title)
record_response = elinkapi.post_new_record(new_record, state)

return record_response


def update_state_of_osti_record(
elinkapi: Elink, osti_id: OstiID, new_state: Literal["save", "submit"]
) -> RecordResponse:
"""
update_state_of_osti_record allows a user to update the state of a record with provided osti_id to either "save" or "submit" (the two valid states)

:elinkapi is the elinkapi (see previous)
:osti_id is the OSTI ID associated with the record of which to update state.
:new_state is a Literal object, in this case a subtype of strings (either "save" or "submit").

returns the record response after updating the state.
"""
record = elinkapi.get_single_record(osti_id)
return elinkapi.update_record(osti_id, record, new_state)


def delete_osti_record(elinkapi: Elink, osti_id: OstiID, reason: str) -> bool:
"""
Delete a record by its OSTI ID.

:elinkapi is the elinkapi
:osti_id is the osti_id associated with the record which ought to be deleted
:reason is a str object which explains in words why the record is to be deleted (necessary for the http request)

returns true if deleted successfully, else false, which indicates a bad status_code
"""
response = requests.delete(
f"{elinkapi.target}records/{osti_id}?reason={reason}",
headers={"Authorization": f"Bearer {elinkapi.token}"},
)
Validation.handle_response(response)
return response.status_code == 204 # True if deleted successfully
84 changes: 84 additions & 0 deletions src/mp_cite/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from pydantic import BaseModel, Field, model_validator

from datetime import datetime
from elinkapi import Record, Organization, Person

from typing import List, Any
import pytz


class DOIModel(BaseModel):
"""
The model for a DOI document in a mongodb collection, which should better align with E-Link's record model.

It is designed for easy transfer from E-Link record response to doi document. All fields can be mapped directly from a
record response keywords of the same name, or, in the case of material_id, it is automatically filled in with site_unique_id
with the model validator `set_material_id(...)`
"""

# identifiers
doi: str = Field(description="The DOI number as allocated by OSTI")
title: str = Field(description="The title of the record")
osti_id: int = Field(
coerce_numbers_to_str=True,
description="The OSTI ID number allocated by OSTI to make the DOI number",
)
material_id: str
site_unique_id: str

# time stamps
date_metadata_added: datetime | None = Field(
description="date_record_entered_onto_ELink"
)
date_metadata_updated: datetime | None = Field(
description="date_record_last_updated_on_Elink"
)

# status
workflow_status: str
date_released: datetime | None = Field(description="")
date_submitted_to_osti_first: datetime = Field(
description="date record was first submitted to OSTI for publication, maintained internally by E-Link"
)
date_submitted_to_osti_last: datetime = Field(
description="most recent date record information was submitted to OSTI. Maintained internally by E-Link"
)
publication_date: datetime | None = Field(description="")

@model_validator(mode="before")
def set_material_id(cls, values: dict[str, Any]):
"""
set_material_id will take the values passed into the model constructor before full instantiation of the object and pydantic parcing
and make it that the whatever is passed in for the unique_site_id will match whatever is passed in for material_id

:cls to designate it as a class method
:values are the values passed into the constructor (contain the "raw input")

returns the values so that instantiation can proceed.
"""
values["material_id"] = values["site_unique_id"]
return values


class MinimumDARecord(Record):
product_type: str = Field(default="DA")
title: str # Required
organizations: List[Organization] = Field(
default_factory=lambda: [
Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"),
Organization(
type="SPONSOR",
name="TEST SPONSOR ORG",
identifiers=[{"type": "CN_DOE", "value": "AC02-05CH11231"}],
), # sponsor org is necessary for submission
]
)
persons: List[Person] = Field(
default_factory=lambda: [Person(type="AUTHOR", last_name="Persson")]
)
site_ownership_code: str = Field(default="LBNL-MP")
access_limitations: List[str] = Field(default_factory=lambda: ["UNL"])
publication_date: datetime = Field(
default_factory=lambda: datetime.now(tz=pytz.UTC)
)
site_url: str = Field(default="https://next-gen.materialsproject.org/materials")
Loading