Skip to content

figurl_curation.py

FigURLCurationSelection

Bases: SpyglassMixin, Manual

Source code in src/spyglass/spikesorting/v1/figurl_curation.py
@schema
class FigURLCurationSelection(SpyglassMixin, dj.Manual):
    definition = """
    # Use `insert_selection` method to insert a row. Use `generate_curation_uri` method to generate a curation uri.
    figurl_curation_id: uuid
    ---
    -> CurationV1
    curation_uri: varchar(1000)     # GitHub-based URI to a file to which the manual curation will be saved
    metrics_figurl: blob            # metrics to display in the figURL
    """

    @classmethod
    def insert_selection(cls, key: dict):
        """Insert a row into FigURLCurationSelection.

        Parameters
        ----------
        key : dict
            primary key of `CurationV1`, `curation_uri`, and `metrics_figurl`.
            - If `curation_uri` is not provided, it will be generated from `generate_curation_uri` method.
            - If `metrics_figurl` is not provided, it will be set to [].

        Returns
        -------
        key : dict
            primary key of `FigURLCurationSelection` table.
        """
        if "curation_uri" not in key:
            key["curation_uri"] = cls.generate_curation_uri(key)
        if "metrics_figurl" not in key:
            key["metrics_figurl"] = []
        if "figurl_curation_id" in key:
            query = cls & {"figurl_curation_id": key["figurl_curation_id"]}
            if query:
                logger.warn("Similar row(s) already inserted.")
                return query.fetch(as_dict=True)
        key["figurl_curation_id"] = uuid.uuid4()
        cls.insert1(key, skip_duplicates=True)
        return key

    @staticmethod
    def generate_curation_uri(key: Dict) -> str:
        """Generates a kachery-cloud URI from a row in CurationV1 table

        Parameters
        ----------
        key : dict
            primary key from CurationV1
        """
        curation_key = (CurationV1 & key).fetch1()
        analysis_file_abs_path = AnalysisNwbfile.get_abs_path(
            curation_key["analysis_file_name"]
        )
        with pynwb.NWBHDF5IO(
            analysis_file_abs_path, "r", load_namespaces=True
        ) as io:
            nwbfile = io.read()
            nwb_sorting = nwbfile.objects[
                curation_key["object_id"]
            ].to_dataframe()
            unit_ids = list(nwb_sorting.index)
            labels = list(nwb_sorting["curation_label"])
            merge_groups = list(nwb_sorting["merge_groups"])

        unit_ids = [str(unit_id) for unit_id in unit_ids]

        labels_dict = (
            {unit_id: list(label) for unit_id, label in zip(unit_ids, labels)}
            if labels
            else {}
        )

        merge_groups_list = (
            [
                [str(unit_id) for unit_id in merge_group]
                for merge_group in _merge_dict_to_list(
                    dict(zip(unit_ids, merge_groups))
                )
            ]
            if merge_groups
            else []
        )

        return kcl.store_json(
            {
                "labelsByUnit": labels_dict,
                "mergeGroups": merge_groups_list,
            }
        )

insert_selection(key) classmethod

Insert a row into FigURLCurationSelection.

Parameters:

Name Type Description Default
key dict

primary key of CurationV1, curation_uri, and metrics_figurl. - If curation_uri is not provided, it will be generated from generate_curation_uri method. - If metrics_figurl is not provided, it will be set to [].

required

Returns:

Name Type Description
key dict

primary key of FigURLCurationSelection table.

Source code in src/spyglass/spikesorting/v1/figurl_curation.py
@classmethod
def insert_selection(cls, key: dict):
    """Insert a row into FigURLCurationSelection.

    Parameters
    ----------
    key : dict
        primary key of `CurationV1`, `curation_uri`, and `metrics_figurl`.
        - If `curation_uri` is not provided, it will be generated from `generate_curation_uri` method.
        - If `metrics_figurl` is not provided, it will be set to [].

    Returns
    -------
    key : dict
        primary key of `FigURLCurationSelection` table.
    """
    if "curation_uri" not in key:
        key["curation_uri"] = cls.generate_curation_uri(key)
    if "metrics_figurl" not in key:
        key["metrics_figurl"] = []
    if "figurl_curation_id" in key:
        query = cls & {"figurl_curation_id": key["figurl_curation_id"]}
        if query:
            logger.warn("Similar row(s) already inserted.")
            return query.fetch(as_dict=True)
    key["figurl_curation_id"] = uuid.uuid4()
    cls.insert1(key, skip_duplicates=True)
    return key

generate_curation_uri(key) staticmethod

Generates a kachery-cloud URI from a row in CurationV1 table

Parameters:

Name Type Description Default
key dict

primary key from CurationV1

required
Source code in src/spyglass/spikesorting/v1/figurl_curation.py
@staticmethod
def generate_curation_uri(key: Dict) -> str:
    """Generates a kachery-cloud URI from a row in CurationV1 table

    Parameters
    ----------
    key : dict
        primary key from CurationV1
    """
    curation_key = (CurationV1 & key).fetch1()
    analysis_file_abs_path = AnalysisNwbfile.get_abs_path(
        curation_key["analysis_file_name"]
    )
    with pynwb.NWBHDF5IO(
        analysis_file_abs_path, "r", load_namespaces=True
    ) as io:
        nwbfile = io.read()
        nwb_sorting = nwbfile.objects[
            curation_key["object_id"]
        ].to_dataframe()
        unit_ids = list(nwb_sorting.index)
        labels = list(nwb_sorting["curation_label"])
        merge_groups = list(nwb_sorting["merge_groups"])

    unit_ids = [str(unit_id) for unit_id in unit_ids]

    labels_dict = (
        {unit_id: list(label) for unit_id, label in zip(unit_ids, labels)}
        if labels
        else {}
    )

    merge_groups_list = (
        [
            [str(unit_id) for unit_id in merge_group]
            for merge_group in _merge_dict_to_list(
                dict(zip(unit_ids, merge_groups))
            )
        ]
        if merge_groups
        else []
    )

    return kcl.store_json(
        {
            "labelsByUnit": labels_dict,
            "mergeGroups": merge_groups_list,
        }
    )

FigURLCuration

Bases: SpyglassMixin, Computed

Source code in src/spyglass/spikesorting/v1/figurl_curation.py
@schema
class FigURLCuration(SpyglassMixin, dj.Computed):
    definition = """
    # URL to the FigURL for manual curation of a spike sorting.
    -> FigURLCurationSelection
    ---
    url: varchar(1000)
    """

    _use_transaction, _allow_insert = False, True

    def make(self, key: dict):
        """Generate a FigURL for manual curation of a spike sorting."""
        # FETCH
        query = (
            FigURLCurationSelection * CurationV1 * SpikeSortingSelection & key
        )
        (
            sorting_fname,
            object_id,
            recording_label,
            metrics_figurl,
        ) = query.fetch1(
            "analysis_file_name", "object_id", "recording_id", "metrics_figurl"
        )

        # DO
        sel_query = FigURLCurationSelection & key
        sel_key = sel_query.fetch1()
        sorting_fpath = AnalysisNwbfile.get_abs_path(sorting_fname)
        recording = CurationV1.get_recording(sel_key)
        sorting = CurationV1.get_sorting(sel_key)
        sorting_label = sel_query.fetch1("sorting_id")
        curation_uri = sel_query.fetch1("curation_uri")

        metric_dict = {}
        with pynwb.NWBHDF5IO(sorting_fpath, "r", load_namespaces=True) as io:
            nwbf = io.read()
            nwb_sorting = nwbf.objects[object_id].to_dataframe()
            unit_ids = nwb_sorting.index
            for metric in metrics_figurl:
                metric_dict[metric] = dict(zip(unit_ids, nwb_sorting[metric]))

        unit_metrics = _reformat_metrics(metric_dict)

        # TODO: figure out a way to specify the similarity metrics

        # Generate the figURL
        key["url"] = _generate_figurl(
            R=recording,
            S=sorting,
            initial_curation_uri=curation_uri,
            recording_label=recording_label,
            sorting_label=sorting_label,
            unit_metrics=unit_metrics,
        )

        # INSERT
        self.insert1(key, skip_duplicates=True)

    @classmethod
    def get_labels(cls, curation_json) -> Dict[int, List[str]]:
        """Uses kachery cloud to load curation json. Returns labelsByUnit."""

        labels_by_unit = kcl.load_json(curation_json).get("labelsByUnit")
        return (
            {
                int(unit_id): curation_label_list
                for unit_id, curation_label_list in labels_by_unit.items()
            }
            if labels_by_unit
            else {}
        )

    @classmethod
    def get_merge_groups(cls, curation_json) -> Dict:
        """Uses kachery cloud to load curation json. Returns mergeGroups."""
        return kcl.load_json(curation_json).get("mergeGroups", {})

make(key)

Generate a FigURL for manual curation of a spike sorting.

Source code in src/spyglass/spikesorting/v1/figurl_curation.py
def make(self, key: dict):
    """Generate a FigURL for manual curation of a spike sorting."""
    # FETCH
    query = (
        FigURLCurationSelection * CurationV1 * SpikeSortingSelection & key
    )
    (
        sorting_fname,
        object_id,
        recording_label,
        metrics_figurl,
    ) = query.fetch1(
        "analysis_file_name", "object_id", "recording_id", "metrics_figurl"
    )

    # DO
    sel_query = FigURLCurationSelection & key
    sel_key = sel_query.fetch1()
    sorting_fpath = AnalysisNwbfile.get_abs_path(sorting_fname)
    recording = CurationV1.get_recording(sel_key)
    sorting = CurationV1.get_sorting(sel_key)
    sorting_label = sel_query.fetch1("sorting_id")
    curation_uri = sel_query.fetch1("curation_uri")

    metric_dict = {}
    with pynwb.NWBHDF5IO(sorting_fpath, "r", load_namespaces=True) as io:
        nwbf = io.read()
        nwb_sorting = nwbf.objects[object_id].to_dataframe()
        unit_ids = nwb_sorting.index
        for metric in metrics_figurl:
            metric_dict[metric] = dict(zip(unit_ids, nwb_sorting[metric]))

    unit_metrics = _reformat_metrics(metric_dict)

    # TODO: figure out a way to specify the similarity metrics

    # Generate the figURL
    key["url"] = _generate_figurl(
        R=recording,
        S=sorting,
        initial_curation_uri=curation_uri,
        recording_label=recording_label,
        sorting_label=sorting_label,
        unit_metrics=unit_metrics,
    )

    # INSERT
    self.insert1(key, skip_duplicates=True)

get_labels(curation_json) classmethod

Uses kachery cloud to load curation json. Returns labelsByUnit.

Source code in src/spyglass/spikesorting/v1/figurl_curation.py
@classmethod
def get_labels(cls, curation_json) -> Dict[int, List[str]]:
    """Uses kachery cloud to load curation json. Returns labelsByUnit."""

    labels_by_unit = kcl.load_json(curation_json).get("labelsByUnit")
    return (
        {
            int(unit_id): curation_label_list
            for unit_id, curation_label_list in labels_by_unit.items()
        }
        if labels_by_unit
        else {}
    )

get_merge_groups(curation_json) classmethod

Uses kachery cloud to load curation json. Returns mergeGroups.

Source code in src/spyglass/spikesorting/v1/figurl_curation.py
@classmethod
def get_merge_groups(cls, curation_json) -> Dict:
    """Uses kachery cloud to load curation json. Returns mergeGroups."""
    return kcl.load_json(curation_json).get("mergeGroups", {})