Skip to content

API

CitableDocument

A Citation's extract_citations() function relies on a CitableDocument.

Creates three main reusable lists:

list concept
@docketed_reports list of DocketReportCitation found in the text, excluding exceptional statutory dockets
@reports list of Report found in the text (which may already be included in @docketed_reports)
@undocketed_reports = @docketed_reports - @reports

Examples:

Python Console Session
>>> text_statutes = "Bar Matter No. 803, Jan. 1, 2000; Bar Matter No. 411, Feb. 1, 2000"
>>> len(CitableDocument(text=text_statutes).docketed_reports) # no citations, since these are 'statutory dockets'
0
>>> text_cites = "374 Phil. 1, 10-11 (1999) 1111 SCRA 1111; G.R. No. 147033, April 30, 2003; G.R. No. 147033, April 30, 2003, 374 Phil. 1, 600; ABC v. XYZ, G.R. Nos. 138570, 138572, 138587, 138680, 138698, October 10, 2000, 342 SCRA 449;  XXX, G.R. No. 31711, Sept. 30, 1971, 35 SCRA 190; Hello World, 1111 SCRA 1111; Y v. Z, 35 SCRA 190;"
>>> doc1 = CitableDocument(text=text_cites)
>>> len(doc1.docketed_reports)
4
>>> doc1.undocketed_reports
{'1111 SCRA 1111'}
>>> text = "<em>Gatchalian Promotions Talent Pool, Inc. v. Atty. Naldoza</em>, 374 Phil. 1, 10-11 (1999), citing: <em>In re Almacen</em>, 31 SCRA 562, 600 (1970).; People v. Umayam, G.R. No. 147033, April 30, 2003; <i>Bagong Alyansang Makabayan v. Zamora,</i> G.R. Nos. 138570, 138572, 138587, 138680, 138698, October 10, 2000, 342 SCRA 449; Villegas <em>v.</em> Subido, G.R. No. 31711, Sept. 30, 1971, 41 SCRA 190;"
>>> doc2 = CitableDocument(text=text)
>>> set(doc2.get_citations()) == {'GR No. 147033, Apr. 30, 2003', 'GR No. 138570, Oct. 10, 2000, 342 SCRA 449', 'GR No. 31711, Sep. 30, 1971, 41 SCRA 190', '374 Phil. 1', '31 SCRA 562'}
True
Source code in citation_utils/document.py
Python
@dataclass
class CitableDocument:
    """Creates three main reusable lists:

    list | concept
    :--:|:--:
    `@docketed_reports` | list of `DocketReportCitation` found in the text, excluding exceptional statutory dockets
    `@reports` | list of `Report` found in the text (which may already be included in `@docketed_reports`)
    `@undocketed_reports` | = `@docketed_reports` - `@reports`

    Examples:
        >>> text_statutes = "Bar Matter No. 803, Jan. 1, 2000; Bar Matter No. 411, Feb. 1, 2000"
        >>> len(CitableDocument(text=text_statutes).docketed_reports) # no citations, since these are 'statutory dockets'
        0
        >>> text_cites = "374 Phil. 1, 10-11 (1999) 1111 SCRA 1111; G.R. No. 147033, April 30, 2003; G.R. No. 147033, April 30, 2003, 374 Phil. 1, 600; ABC v. XYZ, G.R. Nos. 138570, 138572, 138587, 138680, 138698, October 10, 2000, 342 SCRA 449;  XXX, G.R. No. 31711, Sept. 30, 1971, 35 SCRA 190; Hello World, 1111 SCRA 1111; Y v. Z, 35 SCRA 190;"
        >>> doc1 = CitableDocument(text=text_cites)
        >>> len(doc1.docketed_reports)
        4
        >>> doc1.undocketed_reports
        {'1111 SCRA 1111'}
        >>> text = "<em>Gatchalian Promotions Talent Pool, Inc. v. Atty. Naldoza</em>, 374 Phil. 1, 10-11 (1999), citing: <em>In re Almacen</em>, 31 SCRA 562, 600 (1970).; People v. Umayam, G.R. No. 147033, April 30, 2003; <i>Bagong Alyansang Makabayan v. Zamora,</i> G.R. Nos. 138570, 138572, 138587, 138680, 138698, October 10, 2000, 342 SCRA 449; Villegas <em>v.</em> Subido, G.R. No. 31711, Sept. 30, 1971, 41 SCRA 190;"
        >>> doc2 = CitableDocument(text=text)
        >>> set(doc2.get_citations()) == {'GR No. 147033, Apr. 30, 2003', 'GR No. 138570, Oct. 10, 2000, 342 SCRA 449', 'GR No. 31711, Sep. 30, 1971, 41 SCRA 190', '374 Phil. 1', '31 SCRA 562'}
        True
    """  # noqa: E501

    text: str

    def __post_init__(self):
        self.reports = list(Report.extract_reports(self.text))
        self.docketed_reports = list(self.get_docketed_reports(self.text))
        self.undocketed_reports = self.get_undocketed_reports()

    @classmethod
    def get_docketed_reports(
        cls, text: str, exclude_docket_rules: bool = True
    ) -> Iterator[DocketReport]:
        """Extract from `raw` text all raw citations which should include their `Docket` and `Report` component parts.
        This may however include statutory rules since some docket categories like AM and BM use this convention.
        To exclude statutory rules, a flag is included as a default.

        Examples:
            >>> cite = next(CitableDocument.get_docketed_reports("Bagong Alyansang Makabayan v. Zamora, G.R. Nos. 138570, 138572, 138587, 138680, 138698, October 10, 2000, 342 SCRA 449"))
            >>> cite.model_dump(exclude_none=True)
            {'publisher': 'SCRA', 'volume': '342', 'page': '449', 'volpubpage': '342 SCRA 449', 'context': 'G.R. Nos. 138570, 138572, 138587, 138680, 138698', 'category': 'GR', 'ids': '138570, 138572, 138587, 138680, 138698', 'docket_date': datetime.date(2000, 10, 10)}
            >>> statutory_text = "Bar Matter No. 803, Jan. 1, 2000"
            >>> next(CitableDocument.get_docketed_reports(statutory_text)) # default
            Traceback (most recent call last):
                ...
            StopIteration

        Args:
            text (str): Text to look for `Dockets` and `Reports`

        Yields:
            Iterator[DocketReport]: Any of custom `Docket` with `Report` types, e.g. `CitationAC`, etc.
        """  # noqa: E501
        for search_func in (
            CitationAC.search,
            CitationAM.search,
            CitationOCA.search,
            CitationBM.search,
            CitationGR.search,
            CitationPET.search,
            CitationUDK.search,
            CitationJIB.search,
        ):
            # Each search function is applied to the text, each match yielded
            for result in search_func(text):
                if exclude_docket_rules:
                    if is_statutory_rule(result):
                        continue
                    yield result

    def get_undocketed_reports(self):
        """Steps:

        1. From a set of `uniq_reports` (see `self.reports`);
        2. Compare to reports found in `@docketed_reports`
        3. Limit reports to those _without_ an accompaying docket
        """
        uniq_reports = set(Report.get_unique(self.text))
        for cite in self.docketed_reports:
            if cite.volpubpage in uniq_reports:
                uniq_reports.remove(cite.volpubpage)
        return uniq_reports

    def get_citations(self) -> Iterator[str]:
        """There are two main lists to evaluate:

        1. `@docketed_reports` - each includes a `Docket` (optionally attached to a `Report`)
        2. `@reports` - from the same text, just get `Report` objects.

        Can filter out `Report` objects not docketed and thus return
        a more succinct citation list which includes both constructs mentioned above but
        without duplicate `reports`.
        """  # noqa: E501
        if self.docketed_reports:
            for doc_report_cite in self.docketed_reports:
                yield str(doc_report_cite)

            if self.undocketed_reports:
                yield from self.undocketed_reports  # already <str>
        else:
            if self.reports:
                for report in self.reports:
                    yield str(report)

Functions

get_citations()

There are two main lists to evaluate:

  1. @docketed_reports - each includes a Docket (optionally attached to a Report)
  2. @reports - from the same text, just get Report objects.

Can filter out Report objects not docketed and thus return a more succinct citation list which includes both constructs mentioned above but without duplicate reports.

Source code in citation_utils/document.py
Python
def get_citations(self) -> Iterator[str]:
    """There are two main lists to evaluate:

    1. `@docketed_reports` - each includes a `Docket` (optionally attached to a `Report`)
    2. `@reports` - from the same text, just get `Report` objects.

    Can filter out `Report` objects not docketed and thus return
    a more succinct citation list which includes both constructs mentioned above but
    without duplicate `reports`.
    """  # noqa: E501
    if self.docketed_reports:
        for doc_report_cite in self.docketed_reports:
            yield str(doc_report_cite)

        if self.undocketed_reports:
            yield from self.undocketed_reports  # already <str>
    else:
        if self.reports:
            for report in self.reports:
                yield str(report)

get_docketed_reports(text, exclude_docket_rules=True) classmethod

Extract from raw text all raw citations which should include their Docket and Report component parts. This may however include statutory rules since some docket categories like AM and BM use this convention. To exclude statutory rules, a flag is included as a default.

Examples:

Python Console Session
>>> cite = next(CitableDocument.get_docketed_reports("Bagong Alyansang Makabayan v. Zamora, G.R. Nos. 138570, 138572, 138587, 138680, 138698, October 10, 2000, 342 SCRA 449"))
>>> cite.model_dump(exclude_none=True)
{'publisher': 'SCRA', 'volume': '342', 'page': '449', 'volpubpage': '342 SCRA 449', 'context': 'G.R. Nos. 138570, 138572, 138587, 138680, 138698', 'category': 'GR', 'ids': '138570, 138572, 138587, 138680, 138698', 'docket_date': datetime.date(2000, 10, 10)}
>>> statutory_text = "Bar Matter No. 803, Jan. 1, 2000"
>>> next(CitableDocument.get_docketed_reports(statutory_text)) # default
Traceback (most recent call last):
    ...
StopIteration

Parameters:

Name Type Description Default
text str

Text to look for Dockets and Reports

required

Yields:

Type Description
DocketReport

Iterator[DocketReport]: Any of custom Docket with Report types, e.g. CitationAC, etc.

Source code in citation_utils/document.py
Python
@classmethod
def get_docketed_reports(
    cls, text: str, exclude_docket_rules: bool = True
) -> Iterator[DocketReport]:
    """Extract from `raw` text all raw citations which should include their `Docket` and `Report` component parts.
    This may however include statutory rules since some docket categories like AM and BM use this convention.
    To exclude statutory rules, a flag is included as a default.

    Examples:
        >>> cite = next(CitableDocument.get_docketed_reports("Bagong Alyansang Makabayan v. Zamora, G.R. Nos. 138570, 138572, 138587, 138680, 138698, October 10, 2000, 342 SCRA 449"))
        >>> cite.model_dump(exclude_none=True)
        {'publisher': 'SCRA', 'volume': '342', 'page': '449', 'volpubpage': '342 SCRA 449', 'context': 'G.R. Nos. 138570, 138572, 138587, 138680, 138698', 'category': 'GR', 'ids': '138570, 138572, 138587, 138680, 138698', 'docket_date': datetime.date(2000, 10, 10)}
        >>> statutory_text = "Bar Matter No. 803, Jan. 1, 2000"
        >>> next(CitableDocument.get_docketed_reports(statutory_text)) # default
        Traceback (most recent call last):
            ...
        StopIteration

    Args:
        text (str): Text to look for `Dockets` and `Reports`

    Yields:
        Iterator[DocketReport]: Any of custom `Docket` with `Report` types, e.g. `CitationAC`, etc.
    """  # noqa: E501
    for search_func in (
        CitationAC.search,
        CitationAM.search,
        CitationOCA.search,
        CitationBM.search,
        CitationGR.search,
        CitationPET.search,
        CitationUDK.search,
        CitationJIB.search,
    ):
        # Each search function is applied to the text, each match yielded
        for result in search_func(text):
            if exclude_docket_rules:
                if is_statutory_rule(result):
                    continue
                yield result

get_undocketed_reports()

Steps:

  1. From a set of uniq_reports (see self.reports);
  2. Compare to reports found in @docketed_reports
  3. Limit reports to those without an accompaying docket
Source code in citation_utils/document.py
Python
def get_undocketed_reports(self):
    """Steps:

    1. From a set of `uniq_reports` (see `self.reports`);
    2. Compare to reports found in `@docketed_reports`
    3. Limit reports to those _without_ an accompaying docket
    """
    uniq_reports = set(Report.get_unique(self.text))
    for cite in self.docketed_reports:
        if cite.volpubpage in uniq_reports:
            uniq_reports.remove(cite.volpubpage)
    return uniq_reports

Docket Model

Bases: BaseModel

The Docket is the modern identifier of a Supreme Court decision.

It is based on a category, a serial id, and a date.

Field Type Description
context optional (str) Full texted matched by the regex pattern
category optional (DocketCategory) See docket-category-model
ids optional (str) The serial number of the docket category
docket_date optional (date) The date associated with the docket
Sample Citation Category Serial Date
G.R. Nos. 138570, October 10, 2000 GR 74910 October 10, 2000
A.M. RTJ-12-2317 (Formerly OCA I.P.I. No. 10-3378-RTJ), Jan 1, 2000 AM RTJ-12-2317 Jan 1, 2000
A.C. No. 10179 (Formerly CBD 11-2985), March 04, 2014 AC 10179 Mar. 4, 2014

The Docket is often paired with a Report, which is the traditional identifier based on volume and page numbers.

Source code in citation_utils/dockets/models/docket_model.py
Python
class Docket(BaseModel):
    """
    The Docket is the modern identifier of a Supreme Court decision.

    It is based on a `category`, a `serial id`, and a `date`.

    Field | Type | Description
    --:|:--:|:--
    `context` | optional (str) | Full texted matched by the regex pattern
    `category` | optional (DocketCategory) | See [docket-category-model][]
    `ids` | optional (str) | The serial number of the docket category
    `docket_date` | optional (date) | The date associated with the docket

    Sample Citation | Category | Serial | Date
    :-- |:--:|:--:|:--:
    _G.R. Nos. 138570, October 10, 2000_ | GR | 74910 | October 10, 2000
    _A.M. RTJ-12-2317 (Formerly OCA I.P.I. No. 10-3378-RTJ), Jan 1, 2000_ | AM | RTJ-12-2317 |Jan 1, 2000
    _A.C. No. 10179 (Formerly CBD 11-2985), March 04, 2014_ | AC | 10179 | Mar. 4, 2014

    The Docket is often paired with a Report, which is the traditional
    identifier based on volume and page numbers.
    """  # noqa: E501

    model_config = ConfigDict(use_enum_values=True)
    context: str = Field(..., description="Full text matched by regex pattern.")
    category: DocketCategory = Field(..., description="e.g. General Register, etc.")
    ids: str = Field(..., description="Ok for a csv token, e.g. '24141, 14234, 12'")
    docket_date: date = Field(..., description="Either in UK, US styles")

    def __repr__(self) -> str:
        return f"<Docket: {self.category} {self.serial_text}, {self.formatted_date}>"

    def __str__(self) -> str:
        if self.serial_text:
            return f"{self.category} {self.serial_text}, {self.formatted_date}"
        return "No proper string detected."

    def __eq__(self, other: Self) -> bool:
        opt_1 = is_eq(self.category.name, other.category.name)
        opt_2 = is_eq(self.first_id, other.first_id)
        opt_3 = is_eq(self.docket_date.isoformat(), other.docket_date.isoformat())
        return all([opt_1, opt_2, opt_3])

    @property
    def serial_text(self) -> str:
        """From raw `ids`, get the `cleaned_ids`, and of these `cleaned_ids`,
            extract the `@first_id` found to deal with compound ids, e.g.
            ids separated by 'and' and ','

        Returns:
            str: Singular text identifier
        """
        if x := self.first_id or self.ids:
            if adjust := gr_prefix_clean(x):
                return adjust
        return x

    @property
    def first_id(self) -> str:
        """Get first bit from list of separated ids, when possible.

        Returns:
            str: First id found
        """

        def first_exists(char: str, text: str):
            """If a `char` exists in the `text`, split on this value."""
            return text.split(char)[0] if char in text else None

        for char in ["/", ",", ";", " and ", " AND ", "&"]:
            if res := first_exists(char, self.ids):
                return res
        return self.ids

    @property
    def formatted_date(self) -> str | None:
        if self.docket_date:
            return self.docket_date.strftime(DOCKET_DATE_FORMAT)
        return None

Attributes

first_id: str property

Get first bit from list of separated ids, when possible.

Returns:

Name Type Description
str str

First id found

serial_text: str property

From raw ids, get the cleaned_ids, and of these cleaned_ids, extract the @first_id found to deal with compound ids, e.g. ids separated by 'and' and ','

Returns:

Name Type Description
str str

Singular text identifier

Docket Category

Docket Category Model

Bases: StrEnum

Common docket references involving Philippine Supreme Court decisions.

Name Value
GR General Register
AM Administrative Matter
AC Administrative Case
BM Bar Matter
PET Presidential Electoral Tribunal
OCA Office of the Court Administrator
JIB Judicial Integrity Board
UDK Undocketed

Complication: These categories do not always represent decisions. For instance, there are are AM and BM docket numbers that represent rules rather than decisions.

Source code in citation_utils/dockets/models/docket_category.py
Python
class DocketCategory(StrEnum):
    """Common docket references involving Philippine Supreme Court decisions.

    Name | Value
    :--|:--
    `GR` | General Register
    `AM` | Administrative Matter
    `AC` | Administrative Case
    `BM` | Bar Matter
    `PET` | Presidential Electoral Tribunal
    `OCA` | Office of the Court Administrator
    `JIB` | Judicial Integrity Board
    `UDK` | Undocketed

    Complication: These categories do not always represent decisions. For instance,
    there are are `AM` and `BM` docket numbers that represent rules rather
    than decisions.
    """

    GR = "General Register"
    AM = "Administrative Matter"
    AC = "Administrative Case"
    BM = "Bar Matter"
    PET = "Presidential Electoral Tribunal"
    OCA = "Office of the Court Administrator"
    JIB = "Judicial Integrity Board"
    UDK = "Undocketed"

    def __str__(self):
        return self.name

    def __repr__(self) -> str:
        """Uses name of member `gr` instead of Enum default
        `<DocketCategory.GR: 'General Register'>`. It becomes to
        use the following conventions:

        Examples:
            >>> DocketCategory['GR']
            'GR'
            >>> DocketCategory.GR
            'GR'

        Returns:
            str: The value of the Enum name
        """
        return str.__repr__(self.name.upper())

Functions

__repr__()

Uses name of member gr instead of Enum default <DocketCategory.GR: 'General Register'>. It becomes to use the following conventions:

Examples:

Python Console Session
>>> DocketCategory['GR']
'GR'
>>> DocketCategory.GR
'GR'

Returns:

Name Type Description
str str

The value of the Enum name

Source code in citation_utils/dockets/models/docket_category.py
Python
def __repr__(self) -> str:
    """Uses name of member `gr` instead of Enum default
    `<DocketCategory.GR: 'General Register'>`. It becomes to
    use the following conventions:

    Examples:
        >>> DocketCategory['GR']
        'GR'
        >>> DocketCategory.GR
        'GR'

    Returns:
        str: The value of the Enum name
    """
    return str.__repr__(self.name.upper())

Docket CitationConstructor

Although the different category docket models share a similar configuration, the regex strings involved are different for each, prompting the need for a preparatory constructor class:

Bases: BaseModel

Prefatorily, regex strings are defined so that a re.Pattern object can take advantage of the "group_name" assigned in the string.

These are the docket styles with regex strings predefined:

  1. General Register
  2. Administrative Matter
  3. Administrative Case
  4. Bar Matter
  5. Office of the Court Administrator
  6. Presidential Electoral Tribunal
  7. Judicial Integrity Board
  8. Undocketed Case

The CitationConstructor formalizes the assigned group names into their respective fields.

Relatedly, it takes advantage of the citation_date and the citation_report libraries in generating the main @pattern since the regex strings above are only concerned with the key num id formula part of the docket, e.g. GR No. 123... but not the accompanying date and report.

Source code in citation_utils/dockets/models/constructor.py
Python
class CitationConstructor(BaseModel):
    """Prefatorily, regex strings are defined so that a
    `re.Pattern` object can take advantage of the "group_name"
    assigned in the string.

    These are the docket styles with regex strings predefined:

    1. General Register
    2. Administrative Matter
    3. Administrative Case
    4. Bar Matter
    5. Office of the Court Administrator
    6. Presidential Electoral Tribunal
    7. Judicial Integrity Board
    8. Undocketed Case

    The CitationConstructor formalizes the assigned group names into
    their respective fields.

    Relatedly, it takes advantage of
    the `citation_date` and the `citation_report` libraries in
    generating the main `@pattern` since the regex strings above
    are only concerned with the `key` `num` `id` formula part
    of the docket, e.g. `GR` `No.` `123`... but not the accompanying
    date and report.
    """

    label: str = Field(
        ...,
        title="Docket Label",
        description="e.g. General Register, Administrative Matter",
    )
    short_category: str = Field(
        ..., title="Docket Category Shorthand", description="e.g. GR, AM"
    )
    group_name: str = Field(
        ...,
        title="Regex Group Name",
        description=(
            "e.g. 'gr_test_phrase' identifies that portion of the"
            "Match object that should be associated with the label."
        ),
    )
    init_name: str = Field(
        ...,
        title="Regex Initial Group Name",
        description="e.g. gr_mid, am_init; see .regexes for other group names",
    )
    docket_regex: str = Field(
        ...,
        title="Regex Expression Proper",
        description=(
            "The full regex expression which includes the groupnames referred to above."
        ),
    )
    key_regex: str = Field(
        ...,
        title="Regex Key",
        description="Regex portion to get the serial ids",
    )
    num_regex: str = Field(
        ...,
        title="Regex Num",
        description="Regex portion for the num keyword to get the serial ids",
    )

    @property
    def pattern(self) -> re.Pattern:
        """Construct the regex string and generate a full Pattern object from:

        1. `docket_regex`,
        2. `docket_date` defined in the citation-date library
        3. an optional `REPORT_REGEX` defined in the citation-report library

        Returns:
            Pattern: Combination of Docket and Report styles.
        """
        return re.compile(
            "".join(
                [
                    rf"{self.docket_regex}",
                    rf"(?P<extra_phrase>{formerly}?{pp}?){DOCKET_DATE_REGEX}",
                    rf"(?P<opt_report>\,\s*{REPORT_REGEX})?",
                ]
            ),
            re.I | re.X,
        )

    @property
    def key_num_pattern(self) -> re.Pattern:
        """Unlike full @pattern, this regex compiled object is limited to
        just the key and number elements, e.g. "GR No. 123" or "BP Blg. 45"
        """
        regex = rf"{self.key_regex}({self.num_regex})?"
        return re.compile(regex, re.I | re.X)

    def detect(self, raw: str) -> Iterator[dict[str, Any]]:
        """Logic: if `self.init_name` Match group exists, get entire
        regex based on `self.group_name`, extract subgroups which will
        consist of `Docket` and `Report` parts.

        Args:
            raw (str): Text to evaluate

        Yields:
            Iterator[dict[str, Any]]: A dict that can fill up a Docket + Report pydantic BaseModel
        """  # noqa: E501
        for match in self.pattern.finditer(raw):
            if match.group(self.init_name):
                if ctx := match.group(self.group_name).strip(", "):
                    raw_id = cull_extra(self.key_num_pattern.sub("", ctx))
                    ids = raw_id.strip("()[] .,;")
                    raw_date = match.group("docket_date")
                    date_found = decode_date(raw_date, True)
                    if ids and date_found:
                        yield dict(
                            context=ctx,
                            short_category=self.short_category,
                            category=self.label,
                            ids=ids,
                            docket_date=date_found,
                            publisher=get_publisher_label(match),
                            volpubpage=match.group("volpubpage"),
                            volume=match.group("volume"),
                            page=match.group("page"),
                        )

Attributes

key_num_pattern: re.Pattern property

Unlike full @pattern, this regex compiled object is limited to just the key and number elements, e.g. "GR No. 123" or "BP Blg. 45"

pattern: re.Pattern property

Construct the regex string and generate a full Pattern object from:

  1. docket_regex,
  2. docket_date defined in the citation-date library
  3. an optional REPORT_REGEX defined in the citation-report library

Returns:

Name Type Description
Pattern re.Pattern

Combination of Docket and Report styles.

Functions

detect(raw)

Logic: if self.init_name Match group exists, get entire regex based on self.group_name, extract subgroups which will consist of Docket and Report parts.

Parameters:

Name Type Description Default
raw str

Text to evaluate

required

Yields:

Type Description
dict[str, Any]

Iterator[dict[str, Any]]: A dict that can fill up a Docket + Report pydantic BaseModel

Source code in citation_utils/dockets/models/constructor.py
Python
def detect(self, raw: str) -> Iterator[dict[str, Any]]:
    """Logic: if `self.init_name` Match group exists, get entire
    regex based on `self.group_name`, extract subgroups which will
    consist of `Docket` and `Report` parts.

    Args:
        raw (str): Text to evaluate

    Yields:
        Iterator[dict[str, Any]]: A dict that can fill up a Docket + Report pydantic BaseModel
    """  # noqa: E501
    for match in self.pattern.finditer(raw):
        if match.group(self.init_name):
            if ctx := match.group(self.group_name).strip(", "):
                raw_id = cull_extra(self.key_num_pattern.sub("", ctx))
                ids = raw_id.strip("()[] .,;")
                raw_date = match.group("docket_date")
                date_found = decode_date(raw_date, True)
                if ids and date_found:
                    yield dict(
                        context=ctx,
                        short_category=self.short_category,
                        category=self.label,
                        ids=ids,
                        docket_date=date_found,
                        publisher=get_publisher_label(match),
                        volpubpage=match.group("volpubpage"),
                        volume=match.group("volume"),
                        page=match.group("page"),
                    )