Skip to content

Rule Model

Bases: BaseModel

A Rule is detected if it matches either:

  1. Named Patterns or
  2. Serial Patterns

Each rule implies:

  1. Previous validation via regex strings
  2. Path to an intended local directory
  3. Extractable content
  4. Serial title generated by StatuteSerialCategory.serialize()
  5. Countability via a collection.Counter built-in
Source code in statute_patterns/components/rule.py
Python
class Rule(BaseModel):
    """A `Rule` is detected if it matches either:

    1. [`Named Patterns`][named-pattern] or
    2. [`Serial Patterns`][serial-pattern]

    Each rule implies:

    1. Previous validation via regex strings
    2. Path to an intended local directory
    3. Extractable content
    4. Serial title generated by [`StatuteSerialCategory.serialize()`][statute_patterns.components.category.StatuteSerialCategory.serialize]
    5. Countability via a `collection.Counter` built-in
    """  # noqa: E501

    cat: StatuteSerialCategory = Field(
        ...,
        title="Statute Category",
        description=(
            "Classification under the limited StatuteSerialCategory taxonomy."
        ),
    )
    id: constr(to_lower=True) = Field(  # type: ignore
        ...,
        title="Serial Identifier",
        description=(
            "Limited inclusion of identifiers, e.g. only a subset of Executive"
            " Orders, Letters of Instruction, Spanish Codes will be permitted."
        ),
    )

    class Config:
        use_enum_values = True

    def __hash__(self):
        """Pydantic models are [not hashable by default](https://github.com/pydantic/pydantic/issues/1303#issuecomment-599712964).
        It is implemented here to take advantage of `collections.Counter` which works only on objects with a __hash__. This is the
        basis of [`count_rules()`][count-rules]."""  # noqa: E501
        return hash((type(self),) + tuple(self.__dict__.values()))

    @validator("cat", pre=True)
    def category_in_lower_case(cls, v):
        return StatuteSerialCategory(v.lower())

    @validator("id", pre=True)
    def serial_id_lower(cls, v):
        return v.lower()

    @classmethod
    def get_details(cls, details_path: Path):
        """Assumes a properly structured path with three path
        parents from details.yaml, e.g. path to `/statutes/ra/386/details.yaml`
        means 3 parents from the same would be /statutes. Will
        create the rule based on the details path and pull data from other
        related paths to generate the details of the rule."""
        from .details import StatuteDetails

        if rule := cls.from_path(details_path):
            statute_path = details_path.parent.parent.parent
            return StatuteDetails.from_rule(rule, statute_path)
        return None

    @classmethod
    def from_path(cls, details_path: Path):
        """Construct rule from a properly structured statute's `details.yaml` file."""
        dir = details_path.parent
        cat = dir.parent.stem
        idx = dir.stem
        if details_path.name == DETAILS_FILE:
            return cls(cat=StatuteSerialCategory(cat), id=idx)
        return None

    @property
    def serial_title(self):
        return StatuteSerialCategory(self.cat).serialize(self.id)

    def get_path(self, base_path: Path = STATUTE_PATH) -> Path | None:
        """For most cases, there only be one path to path/to/statutes/ra/386 where:

        1. path/to/statutes = base_path
        2. 'ra' is the category
        3. '386' is the id.
        """
        target = base_path / self.cat / self.id
        if target.exists():
            return target
        return None

    def get_paths(self, base_path: Path = STATUTE_PATH) -> list[Path]:
        """
        Ordinarily, the following directory structure would suffice
        in generating the path to a unique statute:

        ```yaml
        /statutes
            /statute-category
                /statute-serial-id
                    details.yaml # unique details here

        ```

        This is not true in complex statutes.

        To simplify, imagine Statute A, B and C have the same
        category and identifier. But refer to different documents:

        ```yaml
        /statutes
            /statute-category
                /statute-serial-id # dated Jan. 1, 2000
                    details.yaml # we'd need a different `statute-serial-id`
                /statute-serial-id # dated Jan. 10, 2000
                    details.yaml # we'd need a different `statute-serial-id`
        ```

        Because of this dilemma, we introduce a digit in the creation of statute
        folders referring to more than one variant of the intended document.

        So in the case of `/statutes/rule_am/`, let's consider `00-5-03-sc`.
        This should be a valid statute under `self.get_path()`.

        However, since there exists 2 variants, we need to rename the original
        folder to contemplate 2 distinct documents:

        ```yaml
        /statutes
            /rule_am
                /00-5-03-sc-1
                    details.yaml # unique
                /00-5-03-sc-2
                    details.yaml # unique
        ```

        Unlike `get_path()` which only retrieves one Path, all Paths will be retrieved
        using the plural form of the function `self.get_paths()`
        """
        targets = []
        target = base_path / self.cat
        paths = target.glob(f"{self.id}-*/{DETAILS_FILE}")
        for variant_path in paths:
            if variant_path.exists():
                targets.append(variant_path.parent)
        return targets

    def extract_folders(
        self, base_path: Path = STATUTE_PATH
    ) -> Iterator[Path]:
        """Using the `category` and `id` of the object,
        get the possible folder paths."""
        if folder := self.get_path(base_path):
            yield folder
        else:
            if folders := self.get_paths(base_path):
                yield from folders

    def units_path(self, statute_folder: Path) -> Path | None:
        """There are two kinds of unit files:

        1. the preferred / customized variant with the filename convention
            `statute-category` + `statute-serial-id` + `.yaml`, e.g. `ra386.yaml`
        2. the one scraped which is the default in the absence of a preferred
            variant, e.g. `units.yaml`
        """
        preferred = statute_folder / f"{self.cat}{self.id}.yaml"
        if preferred.exists():
            return preferred

        default = statute_folder / "units.yaml"
        if default.exists():
            return default

        return None

Functions

__hash__()

Pydantic models are not hashable by default. It is implemented here to take advantage of collections.Counter which works only on objects with a hash. This is the basis of count_rules().

Source code in statute_patterns/components/rule.py
Python
def __hash__(self):
    """Pydantic models are [not hashable by default](https://github.com/pydantic/pydantic/issues/1303#issuecomment-599712964).
    It is implemented here to take advantage of `collections.Counter` which works only on objects with a __hash__. This is the
    basis of [`count_rules()`][count-rules]."""  # noqa: E501
    return hash((type(self),) + tuple(self.__dict__.values()))

extract_folders(base_path=STATUTE_PATH)

Using the category and id of the object, get the possible folder paths.

Source code in statute_patterns/components/rule.py
Python
def extract_folders(
    self, base_path: Path = STATUTE_PATH
) -> Iterator[Path]:
    """Using the `category` and `id` of the object,
    get the possible folder paths."""
    if folder := self.get_path(base_path):
        yield folder
    else:
        if folders := self.get_paths(base_path):
            yield from folders

from_path(details_path) classmethod

Construct rule from a properly structured statute's details.yaml file.

Source code in statute_patterns/components/rule.py
Python
@classmethod
def from_path(cls, details_path: Path):
    """Construct rule from a properly structured statute's `details.yaml` file."""
    dir = details_path.parent
    cat = dir.parent.stem
    idx = dir.stem
    if details_path.name == DETAILS_FILE:
        return cls(cat=StatuteSerialCategory(cat), id=idx)
    return None

get_details(details_path) classmethod

Assumes a properly structured path with three path parents from details.yaml, e.g. path to /statutes/ra/386/details.yaml means 3 parents from the same would be /statutes. Will create the rule based on the details path and pull data from other related paths to generate the details of the rule.

Source code in statute_patterns/components/rule.py
Python
@classmethod
def get_details(cls, details_path: Path):
    """Assumes a properly structured path with three path
    parents from details.yaml, e.g. path to `/statutes/ra/386/details.yaml`
    means 3 parents from the same would be /statutes. Will
    create the rule based on the details path and pull data from other
    related paths to generate the details of the rule."""
    from .details import StatuteDetails

    if rule := cls.from_path(details_path):
        statute_path = details_path.parent.parent.parent
        return StatuteDetails.from_rule(rule, statute_path)
    return None

get_path(base_path=STATUTE_PATH)

For most cases, there only be one path to path/to/statutes/ra/386 where:

  1. path/to/statutes = base_path
  2. 'ra' is the category
  3. '386' is the id.
Source code in statute_patterns/components/rule.py
Python
def get_path(self, base_path: Path = STATUTE_PATH) -> Path | None:
    """For most cases, there only be one path to path/to/statutes/ra/386 where:

    1. path/to/statutes = base_path
    2. 'ra' is the category
    3. '386' is the id.
    """
    target = base_path / self.cat / self.id
    if target.exists():
        return target
    return None

get_paths(base_path=STATUTE_PATH)

Ordinarily, the following directory structure would suffice in generating the path to a unique statute:

YAML
/statutes
    /statute-category
        /statute-serial-id
            details.yaml # unique details here

This is not true in complex statutes.

To simplify, imagine Statute A, B and C have the same category and identifier. But refer to different documents:

YAML
/statutes
    /statute-category
        /statute-serial-id # dated Jan. 1, 2000
            details.yaml # we'd need a different `statute-serial-id`
        /statute-serial-id # dated Jan. 10, 2000
            details.yaml # we'd need a different `statute-serial-id`

Because of this dilemma, we introduce a digit in the creation of statute folders referring to more than one variant of the intended document.

So in the case of /statutes/rule_am/, let's consider 00-5-03-sc. This should be a valid statute under self.get_path().

However, since there exists 2 variants, we need to rename the original folder to contemplate 2 distinct documents:

YAML
/statutes
    /rule_am
        /00-5-03-sc-1
            details.yaml # unique
        /00-5-03-sc-2
            details.yaml # unique

Unlike get_path() which only retrieves one Path, all Paths will be retrieved using the plural form of the function self.get_paths()

Source code in statute_patterns/components/rule.py
Python
def get_paths(self, base_path: Path = STATUTE_PATH) -> list[Path]:
    """
    Ordinarily, the following directory structure would suffice
    in generating the path to a unique statute:

    ```yaml
    /statutes
        /statute-category
            /statute-serial-id
                details.yaml # unique details here

    ```

    This is not true in complex statutes.

    To simplify, imagine Statute A, B and C have the same
    category and identifier. But refer to different documents:

    ```yaml
    /statutes
        /statute-category
            /statute-serial-id # dated Jan. 1, 2000
                details.yaml # we'd need a different `statute-serial-id`
            /statute-serial-id # dated Jan. 10, 2000
                details.yaml # we'd need a different `statute-serial-id`
    ```

    Because of this dilemma, we introduce a digit in the creation of statute
    folders referring to more than one variant of the intended document.

    So in the case of `/statutes/rule_am/`, let's consider `00-5-03-sc`.
    This should be a valid statute under `self.get_path()`.

    However, since there exists 2 variants, we need to rename the original
    folder to contemplate 2 distinct documents:

    ```yaml
    /statutes
        /rule_am
            /00-5-03-sc-1
                details.yaml # unique
            /00-5-03-sc-2
                details.yaml # unique
    ```

    Unlike `get_path()` which only retrieves one Path, all Paths will be retrieved
    using the plural form of the function `self.get_paths()`
    """
    targets = []
    target = base_path / self.cat
    paths = target.glob(f"{self.id}-*/{DETAILS_FILE}")
    for variant_path in paths:
        if variant_path.exists():
            targets.append(variant_path.parent)
    return targets

units_path(statute_folder)

There are two kinds of unit files:

  1. the preferred / customized variant with the filename convention statute-category + statute-serial-id + .yaml, e.g. ra386.yaml
  2. the one scraped which is the default in the absence of a preferred variant, e.g. units.yaml
Source code in statute_patterns/components/rule.py
Python
def units_path(self, statute_folder: Path) -> Path | None:
    """There are two kinds of unit files:

    1. the preferred / customized variant with the filename convention
        `statute-category` + `statute-serial-id` + `.yaml`, e.g. `ra386.yaml`
    2. the one scraped which is the default in the absence of a preferred
        variant, e.g. `units.yaml`
    """
    preferred = statute_folder / f"{self.cat}{self.id}.yaml"
    if preferred.exists():
        return preferred

    default = statute_folder / "units.yaml"
    if default.exists():
        return default

    return None