Skip to content

Doc Meta Info

存储doc对应的信息,同时处理引用的关系

DocItem dataclass

Represents a documentation item within a code repository.

This class stores information about various elements in the codebase, such as functions, classes, and modules, along with their relationships and associated metadata for documentation generation.

Source code in repo_agent/doc_meta_info.py
@dataclass
class DocItem:
    """
    Represents a documentation item within a code repository.

    This class stores information about various elements in the codebase,
    such as functions, classes, and modules, along with their relationships
    and associated metadata for documentation generation.
    """

    item_type: DocItemType = DocItemType._class_function
    item_status: DocItemStatus = DocItemStatus.doc_has_not_been_generated
    obj_name: str = ""
    code_start_line: int = -1
    code_end_line: int = -1
    source_node: Optional[ast.__ast.stmt] = None
    md_content: List[str] = field(default_factory=list)
    content: Dict[Any, Any] = field(default_factory=dict)
    children: Dict[str, DocItem] = field(default_factory=dict)
    father: Any[DocItem] = None
    depth: int = 0
    tree_path: List[DocItem] = field(default_factory=list)
    max_reference_ansce: Any[DocItem] = None
    reference_who: List[DocItem] = field(default_factory=list)
    who_reference_me: List[DocItem] = field(default_factory=list)
    special_reference_type: List[bool] = field(default_factory=list)
    reference_who_name_list: List[str] = field(default_factory=list)
    who_reference_me_name_list: List[str] = field(default_factory=list)
    has_task: bool = False
    multithread_task_id: int = -1

    @staticmethod
    def has_ans_relation(now_a: DocItem, now_b: DocItem):
        """
        Determines if a direct hierarchical relationship exists between two DocItems.



        Args:
            now_a: The first DocItem.
            now_b: The second DocItem.

        Returns:
            The DocItem that is the ancestor or successor, or None if no such relation exists.

        """

        if now_b in now_a.tree_path:
            return now_b
        if now_a in now_b.tree_path:
            return now_a
        return None

    def get_travel_list(self):
        """
        Collects this node and all its descendants into a single list, traversing the tree structure.

        Args:
            None

        Returns:
            list: A list containing the current node and all its descendants.

        """

        now_list = [self]
        for _, child in self.children.items():
            now_list = now_list + child.get_travel_list()
        return now_list

    def check_depth(self):
        """
        Determines the depth of the subtree rooted at this node.

        The depth represents the longest path from this node to a leaf node, measured in edges.

        The depth is defined as the number of edges on the longest path from this node to a leaf.

        Args:
            None

        Returns:
            int: The depth of the tree.

        """

        if len(self.children) == 0:
            self.depth = 0
            return self.depth
        max_child_depth = 0
        for _, child in self.children.items():
            child_depth = child.check_depth()
            max_child_depth = max(child_depth, max_child_depth)
        self.depth = max_child_depth + 1
        return self.depth

    def parse_tree_path(self, now_path):
        """
        Recursively constructs the path from the root to this node within a documentation tree.

        Args:
            now_path: The current path being built.

        Returns:
            None

        """

        self.tree_path = now_path + [self]
        for key, child in self.children.items():
            child.parse_tree_path(self.tree_path)

    def get_file_name(self):
        """
        Returns the base name of the documented item, including the .py extension.

        Args:
            stri: Not used. Included for compatibility with parent class method signature.

        Returns:
            str: The file name of the module (without path) including the .py extension.

        """

        full_name = self.get_full_name()
        return full_name.split(".py")[0] + ".py"

    def get_full_name(self, strict=False):
        """
        Constructs the complete, hierarchical name of the object by ascending through its parent relationships. Ensures uniqueness within each level of the hierarchy when requested, appending a version indicator to duplicated names.

        Args:
            strict: If True, ensures uniqueness of names in the path by appending
                '(name_duplicate_version)' if a duplicate is found.

        Returns:
            str: The full name of the object as a string, with components joined by '/'.

        """

        if self.father == None:
            return self.obj_name
        name_list = []
        now = self
        while now != None:
            self_name = now.obj_name
            if strict:
                for name, item in self.father.children.items():
                    if item == now:
                        self_name = name
                        break
                if self_name != now.obj_name:
                    self_name = self_name + "(name_duplicate_version)"
            name_list = [self_name] + name_list
            now = now.father
        name_list = name_list[1:]
        return "/".join(name_list)

    def find(self, recursive_file_path: list) -> Optional[DocItem]:
        """
        Locates a DocItem by traversing the repository structure using a list representing the path to the item. Returns the found DocItem or None if the path is invalid.

        Args:
            recursive_file_path: A list representing the path to the desired DocItem
                within the repository structure. Each element in the list is a key
                representing a directory or item name.

        Returns:
            DocItem: The DocItem found at the specified recursive file path, or None if
                the path does not exist.


        """

        assert self.item_type == DocItemType._repo
        pos = 0
        now = self
        while pos < len(recursive_file_path):
            if not recursive_file_path[pos] in now.children.keys():
                return None
            now = now.children[recursive_file_path[pos]]
            pos += 1
        return now

    @staticmethod
    def check_has_task(now_item: DocItem, ignore_list: List[str] = []):
        """
        Recursively checks if the current DocItem or any of its descendants require processing, updating the `has_task` attribute accordingly.

        Args:
            now_item: The DocItem to check.
            ignore_list: A list of strings representing items to ignore during the check.

        Returns:
            None

        """

        if need_to_generate(now_item, ignore_list=ignore_list):
            now_item.has_task = True
        for _, child in now_item.children.items():
            DocItem.check_has_task(child, ignore_list)
            now_item.has_task = child.has_task or now_item.has_task

    def print_recursive(
        self,
        indent=0,
        print_content=False,
        diff_status=False,
        ignore_list: List[str] = [],
    ):
        """
        Prints the object and its children in a tree-like structure, indicating status changes where applicable. The output is indented to represent the hierarchy of objects.

        Args:
            indent: The level of indentation for printing.
            print_content: Whether to print the content of the object. Not used in this method, but passed down recursively.
            diff_status: Whether to include diff status in the output.
            ignore_list: A list of strings to ignore when determining if a diff is needed.

        Returns:
            None


        """

        def print_indent(indent=0):
            if indent == 0:
                return ""
            return "  " * indent + "|-"

        print_obj_name = self.obj_name
        setting = SettingsManager.get_setting()
        if self.item_type == DocItemType._repo:
            print_obj_name = setting.project.target_repo
        if diff_status and need_to_generate(self, ignore_list=ignore_list):
            print(
                print_indent(indent)
                + f"{self.item_type.print_self()}: {print_obj_name} : {self.item_status.name}"
            )
        else:
            print(
                print_indent(indent)
                + f"{self.item_type.print_self()}: {print_obj_name}"
            )
        for child_name, child in self.children.items():
            if diff_status and child.has_task == False:
                continue
            child.print_recursive(
                indent=indent + 1,
                print_content=print_content,
                diff_status=diff_status,
                ignore_list=ignore_list,
            )

check_depth()

Determines the depth of the subtree rooted at this node.

The depth represents the longest path from this node to a leaf node, measured in edges.

The depth is defined as the number of edges on the longest path from this node to a leaf.

Returns:

Name Type Description
int

The depth of the tree.

Source code in repo_agent/doc_meta_info.py
def check_depth(self):
    """
    Determines the depth of the subtree rooted at this node.

    The depth represents the longest path from this node to a leaf node, measured in edges.

    The depth is defined as the number of edges on the longest path from this node to a leaf.

    Args:
        None

    Returns:
        int: The depth of the tree.

    """

    if len(self.children) == 0:
        self.depth = 0
        return self.depth
    max_child_depth = 0
    for _, child in self.children.items():
        child_depth = child.check_depth()
        max_child_depth = max(child_depth, max_child_depth)
    self.depth = max_child_depth + 1
    return self.depth

check_has_task(now_item, ignore_list=[]) staticmethod

Recursively checks if the current DocItem or any of its descendants require processing, updating the has_task attribute accordingly.

Parameters:

Name Type Description Default
now_item DocItem

The DocItem to check.

required
ignore_list List[str]

A list of strings representing items to ignore during the check.

[]

Returns:

Type Description

None

Source code in repo_agent/doc_meta_info.py
@staticmethod
def check_has_task(now_item: DocItem, ignore_list: List[str] = []):
    """
    Recursively checks if the current DocItem or any of its descendants require processing, updating the `has_task` attribute accordingly.

    Args:
        now_item: The DocItem to check.
        ignore_list: A list of strings representing items to ignore during the check.

    Returns:
        None

    """

    if need_to_generate(now_item, ignore_list=ignore_list):
        now_item.has_task = True
    for _, child in now_item.children.items():
        DocItem.check_has_task(child, ignore_list)
        now_item.has_task = child.has_task or now_item.has_task

find(recursive_file_path)

Locates a DocItem by traversing the repository structure using a list representing the path to the item. Returns the found DocItem or None if the path is invalid.

Parameters:

Name Type Description Default
recursive_file_path list

A list representing the path to the desired DocItem within the repository structure. Each element in the list is a key representing a directory or item name.

required

Returns:

Name Type Description
DocItem Optional[DocItem]

The DocItem found at the specified recursive file path, or None if the path does not exist.

Source code in repo_agent/doc_meta_info.py
def find(self, recursive_file_path: list) -> Optional[DocItem]:
    """
    Locates a DocItem by traversing the repository structure using a list representing the path to the item. Returns the found DocItem or None if the path is invalid.

    Args:
        recursive_file_path: A list representing the path to the desired DocItem
            within the repository structure. Each element in the list is a key
            representing a directory or item name.

    Returns:
        DocItem: The DocItem found at the specified recursive file path, or None if
            the path does not exist.


    """

    assert self.item_type == DocItemType._repo
    pos = 0
    now = self
    while pos < len(recursive_file_path):
        if not recursive_file_path[pos] in now.children.keys():
            return None
        now = now.children[recursive_file_path[pos]]
        pos += 1
    return now

get_file_name()

Returns the base name of the documented item, including the .py extension.

Parameters:

Name Type Description Default
stri

Not used. Included for compatibility with parent class method signature.

required

Returns:

Name Type Description
str

The file name of the module (without path) including the .py extension.

Source code in repo_agent/doc_meta_info.py
def get_file_name(self):
    """
    Returns the base name of the documented item, including the .py extension.

    Args:
        stri: Not used. Included for compatibility with parent class method signature.

    Returns:
        str: The file name of the module (without path) including the .py extension.

    """

    full_name = self.get_full_name()
    return full_name.split(".py")[0] + ".py"

get_full_name(strict=False)

Constructs the complete, hierarchical name of the object by ascending through its parent relationships. Ensures uniqueness within each level of the hierarchy when requested, appending a version indicator to duplicated names.

Parameters:

Name Type Description Default
strict

If True, ensures uniqueness of names in the path by appending '(name_duplicate_version)' if a duplicate is found.

False

Returns:

Name Type Description
str

The full name of the object as a string, with components joined by '/'.

Source code in repo_agent/doc_meta_info.py
def get_full_name(self, strict=False):
    """
    Constructs the complete, hierarchical name of the object by ascending through its parent relationships. Ensures uniqueness within each level of the hierarchy when requested, appending a version indicator to duplicated names.

    Args:
        strict: If True, ensures uniqueness of names in the path by appending
            '(name_duplicate_version)' if a duplicate is found.

    Returns:
        str: The full name of the object as a string, with components joined by '/'.

    """

    if self.father == None:
        return self.obj_name
    name_list = []
    now = self
    while now != None:
        self_name = now.obj_name
        if strict:
            for name, item in self.father.children.items():
                if item == now:
                    self_name = name
                    break
            if self_name != now.obj_name:
                self_name = self_name + "(name_duplicate_version)"
        name_list = [self_name] + name_list
        now = now.father
    name_list = name_list[1:]
    return "/".join(name_list)

get_travel_list()

Collects this node and all its descendants into a single list, traversing the tree structure.

Returns:

Name Type Description
list

A list containing the current node and all its descendants.

Source code in repo_agent/doc_meta_info.py
def get_travel_list(self):
    """
    Collects this node and all its descendants into a single list, traversing the tree structure.

    Args:
        None

    Returns:
        list: A list containing the current node and all its descendants.

    """

    now_list = [self]
    for _, child in self.children.items():
        now_list = now_list + child.get_travel_list()
    return now_list

has_ans_relation(now_a, now_b) staticmethod

Determines if a direct hierarchical relationship exists between two DocItems.

Parameters:

Name Type Description Default
now_a DocItem

The first DocItem.

required
now_b DocItem

The second DocItem.

required

Returns:

Type Description

The DocItem that is the ancestor or successor, or None if no such relation exists.

Source code in repo_agent/doc_meta_info.py
@staticmethod
def has_ans_relation(now_a: DocItem, now_b: DocItem):
    """
    Determines if a direct hierarchical relationship exists between two DocItems.



    Args:
        now_a: The first DocItem.
        now_b: The second DocItem.

    Returns:
        The DocItem that is the ancestor or successor, or None if no such relation exists.

    """

    if now_b in now_a.tree_path:
        return now_b
    if now_a in now_b.tree_path:
        return now_a
    return None

parse_tree_path(now_path)

Recursively constructs the path from the root to this node within a documentation tree.

Parameters:

Name Type Description Default
now_path

The current path being built.

required

Returns:

Type Description

None

Source code in repo_agent/doc_meta_info.py
def parse_tree_path(self, now_path):
    """
    Recursively constructs the path from the root to this node within a documentation tree.

    Args:
        now_path: The current path being built.

    Returns:
        None

    """

    self.tree_path = now_path + [self]
    for key, child in self.children.items():
        child.parse_tree_path(self.tree_path)

print_recursive(indent=0, print_content=False, diff_status=False, ignore_list=[])

Prints the object and its children in a tree-like structure, indicating status changes where applicable. The output is indented to represent the hierarchy of objects.

Parameters:

Name Type Description Default
indent

The level of indentation for printing.

0
print_content

Whether to print the content of the object. Not used in this method, but passed down recursively.

False
diff_status

Whether to include diff status in the output.

False
ignore_list List[str]

A list of strings to ignore when determining if a diff is needed.

[]

Returns:

Type Description

None

Source code in repo_agent/doc_meta_info.py
def print_recursive(
    self,
    indent=0,
    print_content=False,
    diff_status=False,
    ignore_list: List[str] = [],
):
    """
    Prints the object and its children in a tree-like structure, indicating status changes where applicable. The output is indented to represent the hierarchy of objects.

    Args:
        indent: The level of indentation for printing.
        print_content: Whether to print the content of the object. Not used in this method, but passed down recursively.
        diff_status: Whether to include diff status in the output.
        ignore_list: A list of strings to ignore when determining if a diff is needed.

    Returns:
        None


    """

    def print_indent(indent=0):
        if indent == 0:
            return ""
        return "  " * indent + "|-"

    print_obj_name = self.obj_name
    setting = SettingsManager.get_setting()
    if self.item_type == DocItemType._repo:
        print_obj_name = setting.project.target_repo
    if diff_status and need_to_generate(self, ignore_list=ignore_list):
        print(
            print_indent(indent)
            + f"{self.item_type.print_self()}: {print_obj_name} : {self.item_status.name}"
        )
    else:
        print(
            print_indent(indent)
            + f"{self.item_type.print_self()}: {print_obj_name}"
        )
    for child_name, child in self.children.items():
        if diff_status and child.has_task == False:
            continue
        child.print_recursive(
            indent=indent + 1,
            print_content=print_content,
            diff_status=diff_status,
            ignore_list=ignore_list,
        )

DocItemStatus

Bases: Enum

Represents the status of documentation items in relation to their code.

This class is used to track whether documentation for a given item (e.g., a function, class, or module) is up-to-date, has been generated, and if the underlying code has changed since the last documentation generation. It also manages flags related to referencers.

Source code in repo_agent/doc_meta_info.py
@unique
class DocItemStatus(Enum):
    """
    Represents the status of documentation items in relation to their code.

    This class is used to track whether documentation for a given item (e.g., a function,
    class, or module) is up-to-date, has been generated, and if the underlying code
    has changed since the last documentation generation. It also manages flags related
    to referencers.
    """

    doc_up_to_date = auto()
    doc_has_not_been_generated = auto()
    code_changed = auto()
    add_new_referencer = auto()
    referencer_not_exist = auto()

DocItemType

Bases: Enum

Represents the type of a documentation item found in a repository.

This class provides an enum-like structure to categorize different elements within code documentation, such as classes, functions, directories, and files. It also includes methods for converting the item type to a string and printing it with color coding.

Class Attributes: - _repo - _dir - _file - _class - _class_function - _function - _sub_function - _global_var

Class Methods: - to_str:

Source code in repo_agent/doc_meta_info.py
@unique
class DocItemType(Enum):
    """
    Represents the type of a documentation item found in a repository.

    This class provides an enum-like structure to categorize different elements
    within code documentation, such as classes, functions, directories, and files.
    It also includes methods for converting the item type to a string
    and printing it with color coding.

    Class Attributes:
    - _repo
    - _dir
    - _file
    - _class
    - _class_function
    - _function
    - _sub_function
    - _global_var

    Class Methods:
    - to_str:
    """

    _repo = auto()
    _dir = auto()
    _file = auto()
    _class = auto()
    _class_function = auto()
    _function = auto()
    _sub_function = auto()
    _global_var = auto()

    def to_str(self):
        """
        Returns a string representation of the DocItemType, such as 'ClassDef', 'FunctionDef', or 'Dir'. If the type is not explicitly mapped, it returns the item's name.

        Args:
            self: The DocItemType enum instance.

        Returns:
            str: A string representing the DocItemType, such as 'ClassDef',
                 'FunctionDef', or 'Dir'.  If the type is not recognized,
                 returns the name of the item.

        """

        if self == DocItemType._class:
            return "ClassDef"
        elif self == DocItemType._function:
            return "FunctionDef"
        elif self == DocItemType._class_function:
            return "FunctionDef"
        elif self == DocItemType._sub_function:
            return "FunctionDef"
        elif self == DocItemType._dir:
            return "Dir"
        return self.name

    def print_self(self):
        """
        Formats the DocItem name with color-coding to visually distinguish its type.

        Args:
            self: The DocItem object whose name should be printed.

        Returns:
            str: A colored string representing the name of the DocItem,
                 with Style.RESET_ALL appended to reset the color.

        """

        color = Fore.WHITE
        if self == DocItemType._dir:
            color = Fore.GREEN
        elif self == DocItemType._file:
            color = Fore.YELLOW
        elif self == DocItemType._class:
            color = Fore.RED
        elif self in [
            DocItemType._function,
            DocItemType._sub_function,
            DocItemType._class_function,
        ]:
            color = Fore.BLUE
        return color + self.name + Style.RESET_ALL

    def get_edge_type(self, from_item_type: DocItemType, to_item_type: DocItemType):
        """
        Determines the relationship between two documentation items.

        Args:
            type: The source documentation item type.
            to_item_type: The destination documentation item type.

        Returns:
            A string representing the connection between the two items.


                Args:
                    type: The source DocItemType.
                    to_item_type: The destination DocItemType.

                Returns:
                    str: The edge type string representation.

        """

        pass

get_edge_type(from_item_type, to_item_type)

Determines the relationship between two documentation items.

Parameters:

Name Type Description Default
type

The source documentation item type.

required
to_item_type DocItemType

The destination documentation item type.

required

Returns:

Type Description

A string representing the connection between the two items.

Args: type: The source DocItemType. to_item_type: The destination DocItemType.

Returns: str: The edge type string representation.

Source code in repo_agent/doc_meta_info.py
def get_edge_type(self, from_item_type: DocItemType, to_item_type: DocItemType):
    """
    Determines the relationship between two documentation items.

    Args:
        type: The source documentation item type.
        to_item_type: The destination documentation item type.

    Returns:
        A string representing the connection between the two items.


            Args:
                type: The source DocItemType.
                to_item_type: The destination DocItemType.

            Returns:
                str: The edge type string representation.

    """

    pass

print_self()

Formats the DocItem name with color-coding to visually distinguish its type.

Parameters:

Name Type Description Default
self

The DocItem object whose name should be printed.

required

Returns:

Name Type Description
str

A colored string representing the name of the DocItem, with Style.RESET_ALL appended to reset the color.

Source code in repo_agent/doc_meta_info.py
def print_self(self):
    """
    Formats the DocItem name with color-coding to visually distinguish its type.

    Args:
        self: The DocItem object whose name should be printed.

    Returns:
        str: A colored string representing the name of the DocItem,
             with Style.RESET_ALL appended to reset the color.

    """

    color = Fore.WHITE
    if self == DocItemType._dir:
        color = Fore.GREEN
    elif self == DocItemType._file:
        color = Fore.YELLOW
    elif self == DocItemType._class:
        color = Fore.RED
    elif self in [
        DocItemType._function,
        DocItemType._sub_function,
        DocItemType._class_function,
    ]:
        color = Fore.BLUE
    return color + self.name + Style.RESET_ALL

to_str()

Returns a string representation of the DocItemType, such as 'ClassDef', 'FunctionDef', or 'Dir'. If the type is not explicitly mapped, it returns the item's name.

Parameters:

Name Type Description Default
self

The DocItemType enum instance.

required

Returns:

Name Type Description
str

A string representing the DocItemType, such as 'ClassDef', 'FunctionDef', or 'Dir'. If the type is not recognized, returns the name of the item.

Source code in repo_agent/doc_meta_info.py
def to_str(self):
    """
    Returns a string representation of the DocItemType, such as 'ClassDef', 'FunctionDef', or 'Dir'. If the type is not explicitly mapped, it returns the item's name.

    Args:
        self: The DocItemType enum instance.

    Returns:
        str: A string representing the DocItemType, such as 'ClassDef',
             'FunctionDef', or 'Dir'.  If the type is not recognized,
             returns the name of the item.

    """

    if self == DocItemType._class:
        return "ClassDef"
    elif self == DocItemType._function:
        return "FunctionDef"
    elif self == DocItemType._class_function:
        return "FunctionDef"
    elif self == DocItemType._sub_function:
        return "FunctionDef"
    elif self == DocItemType._dir:
        return "Dir"
    return self.name

EdgeType

Bases: Enum

Represents the type of edge in a graph, categorizing relationships between nodes.

This class serves as an enumeration for different edge types used to represent relationships such as references, subfile inclusions, and file-item associations.

Class Attributes: - reference_edge - subfile_edge - file_item_edge

Source code in repo_agent/doc_meta_info.py
@unique
class EdgeType(Enum):
    """
    Represents the type of edge in a graph, categorizing relationships between nodes.

    This class serves as an enumeration for different edge types used to represent
    relationships such as references, subfile inclusions, and file-item associations.

    Class Attributes:
    - reference_edge
    - subfile_edge
    - file_item_edge
    """

    reference_edge = auto()
    subfile_edge = auto()
    file_item_edge = auto()

MetaInfo dataclass

MetaInfo class for managing and representing project metadata.

This class stores information about a software repository, including its structure, files, references, and task dependencies. It provides methods for loading, saving, and manipulating this metadata to support documentation generation and analysis.

Source code in repo_agent/doc_meta_info.py
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
@dataclass
class MetaInfo:
    """
    MetaInfo class for managing and representing project metadata.

    This class stores information about a software repository, including its structure,
    files, references, and task dependencies. It provides methods for loading, saving,
    and manipulating this metadata to support documentation generation and analysis.
    """

    repo_path: Path = ""
    document_version: str = ""
    main_idea: str = ""
    repo_structure: Dict[str, Any] = field(default_factory=dict)
    target_repo_hierarchical_tree: "DocItem" = field(default_factory=lambda: DocItem())
    white_list: Any[List] = None
    fake_file_reflection: Dict[str, str] = field(default_factory=dict)
    jump_files: List[str] = field(default_factory=list)
    deleted_items_from_older_meta: List[List] = field(default_factory=list)
    in_generation_process: bool = False
    checkpoint_lock: threading.Lock = threading.Lock()

    @staticmethod
    def init_meta_info(file_path_reflections, jump_files) -> MetaInfo:
        """
        Creates a MetaInfo object representing the project’s structure by analyzing files and directories.

        Args:
            file_path_reflections: The file path reflections to use.
            jump_files: The jump files to use.

        Returns:
            MetaInfo: A MetaInfo object representing the project's metadata.


        """

        setting = SettingsManager.get_setting()
        project_abs_path = setting.project.target_repo
        print(
            f"{Fore.LIGHTRED_EX}Initializing MetaInfo: {Style.RESET_ALL}from {project_abs_path}"
        )
        file_handler = FileHandler(project_abs_path, None)
        repo_structure = file_handler.generate_overall_structure(
            file_path_reflections, jump_files
        )
        metainfo = MetaInfo.from_project_hierarchy_json(repo_structure)
        metainfo.repo_path = project_abs_path
        metainfo.fake_file_reflection = file_path_reflections
        metainfo.jump_files = jump_files
        return metainfo

    @staticmethod
    def from_checkpoint_path(
        checkpoint_dir_path: Path, repo_structure: Optional[Dict[str, Any]] = None
    ) -> MetaInfo:
        """
        Loads project metadata from a checkpoint directory to restore a previous state.

        Args:
            checkpoint_dir_path: The path to the checkpoint directory.
            t_dir_path:  The path to the temporary directory.
            repo_structure: An optional dictionary representing the repository structure.

        Returns:
            MetaInfo: A MetaInfo object loaded from the checkpoint data.


        """

        setting = SettingsManager.get_setting()
        project_hierarchy_json_path = checkpoint_dir_path / "project_hierarchy.json"
        with open(project_hierarchy_json_path, "r", encoding="utf-8") as reader:
            project_hierarchy_json = json.load(reader)
        metainfo = MetaInfo.from_project_hierarchy_json(
            project_hierarchy_json, repo_structure
        )
        with open(
            checkpoint_dir_path / "meta-info.json", "r", encoding="utf-8"
        ) as reader:
            meta_data = json.load(reader)
            metainfo.repo_path = setting.project.target_repo
            metainfo.main_idea = meta_data["main_idea"]
            metainfo.document_version = meta_data["doc_version"]
            metainfo.fake_file_reflection = meta_data["fake_file_reflection"]
            metainfo.jump_files = meta_data["jump_files"]
            metainfo.in_generation_process = meta_data["in_generation_process"]
            metainfo.deleted_items_from_older_meta = meta_data[
                "deleted_items_from_older_meta"
            ]
        print(f"{Fore.CYAN}Loading MetaInfo:{Style.RESET_ALL} {checkpoint_dir_path}")
        return metainfo

    def checkpoint(self, target_dir_path: str | Path, flash_reference_relation=False):
        """
        Persists the project’s metadata and hierarchy to a specified directory, ensuring data is saved for later use or recovery. Includes options to control the level of detail in the saved hierarchy representation.

        Args:
            target_dir_path: The path to the directory where the checkpoint should be saved.
            flash_reference_relation: A boolean indicating whether to include flash reference relations in the hierarchy JSON.

        Returns:
            None


        """

        with self.checkpoint_lock:
            target_dir = Path(target_dir_path)
            logger.debug(f"Checkpointing MetaInfo to directory: {target_dir}")
            print(f"{Fore.GREEN}MetaInfo is Refreshed and Saved{Style.RESET_ALL}")
            if not target_dir.exists():
                target_dir.mkdir(parents=True, exist_ok=True)
                logger.debug(f"Created directory: {target_dir}")
            now_hierarchy_json = self.to_hierarchy_json(
                flash_reference_relation=flash_reference_relation
            )
            hierarchy_file = target_dir / "project_hierarchy.json"
            try:
                with hierarchy_file.open("w", encoding="utf-8") as writer:
                    json.dump(now_hierarchy_json, writer, indent=2, ensure_ascii=False)
                logger.debug(f"Saved hierarchy JSON to {hierarchy_file}")
            except IOError as e:
                logger.error(f"Failed to save hierarchy JSON to {hierarchy_file}: {e}")
            meta_info_file = target_dir / "meta-info.json"
            meta = {
                "main_idea": SettingsManager().get_setting().project.main_idea,
                "doc_version": self.document_version,
                "in_generation_process": self.in_generation_process,
                "fake_file_reflection": self.fake_file_reflection,
                "jump_files": self.jump_files,
                "deleted_items_from_older_meta": self.deleted_items_from_older_meta,
            }
            try:
                with meta_info_file.open("w", encoding="utf-8") as writer:
                    json.dump(meta, writer, indent=2, ensure_ascii=False)
                logger.debug(f"Saved meta-info JSON to {meta_info_file}")
            except IOError as e:
                logger.error(f"Failed to save meta-info JSON to {meta_info_file}: {e}")

    def print_task_list(self, task_dict: Dict[Task]):
        """
        Displays a table summarizing task details, including ID, reason for documentation generation, file path, and dependencies. Dependency lists are truncated for brevity if they exceed a certain length.

        Args:
            task_dict: A dictionary where keys are task IDs and values are Task objects.

        Returns:
            None


        """

        task_table = PrettyTable(
            ["task_id", "Doc Generation Reason", "Path", "dependency"]
        )
        for task_id, task_info in task_dict.items():
            remain_str = "None"
            if task_info.dependencies != []:
                remain_str = ",".join(
                    [str(d_task.task_id) for d_task in task_info.dependencies]
                )
                if len(remain_str) > 20:
                    remain_str = remain_str[:8] + "..." + remain_str[-8:]
            task_table.add_row(
                [
                    task_id,
                    task_info.extra_info.item_status.name,
                    task_info.extra_info.get_full_name(strict=True),
                    remain_str,
                ]
            )
        print(task_table)

    def get_all_files(self, count_repo=False) -> List[DocItem]:
        """
        Returns a list of all items – files and directories – within the project's structure. Optionally includes the root repository item in the results.

        Args:
            count_repo: Whether to include the root repo item in the results.

        Returns:
            A list of DocItem objects representing all files and directories.


        """

        files = []

        def walk_tree(now_node):
            if now_node.item_type in [DocItemType._file, DocItemType._dir]:
                files.append(now_node)
            if count_repo and now_node.item_type == DocItemType._repo:
                files.append(now_node)
            for _, child in now_node.children.items():
                walk_tree(child)

        walk_tree(self.target_repo_hierarchical_tree)
        return files

    def find_obj_with_lineno(self, file_node: DocItem, start_line_num) -> DocItem:
        """
        No valid docstring found.

        """

        now_node = file_node
        assert now_node != None
        while len(now_node.children) > 0:
            find_qualify_child = False
            for _, child in now_node.children.items():
                assert child.content != None
                if (
                    child.content["code_start_line"] <= start_line_num
                    and child.content["code_end_line"] >= start_line_num
                ):
                    now_node = child
                    find_qualify_child = True
                    break
            if not find_qualify_child:
                return now_node
        return now_node

    def parse_reference(self):
        """
        No valid docstring found.

        """

        file_nodes = self.get_all_files()
        white_list_file_names, white_list_obj_names = ([], [])
        if self.white_list != None:
            white_list_file_names = [cont["file_path"] for cont in self.white_list]
            white_list_obj_names = [cont["id_text"] for cont in self.white_list]
        for file_node in tqdm(file_nodes, desc="parsing bidirectional reference"):
            "检测一个文件内的所有引用信息,只能检测引用该文件内某个obj的其他内容。\n            1. 如果某个文件是jump-files,就不应该出现在这个循环里\n            2. 如果检测到的引用信息来源于一个jump-files, 忽略它\n            3. 如果检测到一个引用来源于fake-file,则认为他的母文件是原来的文件\n"
            assert not file_node.get_full_name().endswith(latest_verison_substring)
            ref_count = 0
            rel_file_path = file_node.get_full_name()
            assert rel_file_path not in self.jump_files
            if (
                white_list_file_names != []
                and file_node.get_file_name() not in white_list_file_names
            ):
                continue

            def walk_file(now_obj: DocItem):
                """在文件内遍历所有变量"""
                nonlocal ref_count, white_list_file_names
                in_file_only = False
                if (
                    white_list_obj_names != []
                    and now_obj.obj_name not in white_list_obj_names
                ):
                    in_file_only = True
                if SettingsManager().get_setting().project.parse_references:
                    reference_list = find_all_referencer(
                        repo_path=self.repo_path,
                        variable_name=now_obj.obj_name,
                        file_path=rel_file_path,
                        line_number=now_obj.content["code_start_line"],
                        column_number=now_obj.content["name_column"],
                        in_file_only=in_file_only,
                    )
                else:
                    reference_list = []
                for referencer_pos in reference_list:
                    referencer_file_ral_path = referencer_pos[0]
                    if referencer_file_ral_path in self.fake_file_reflection.values():
                        "检测到的引用者来自于unstaged files,跳过该引用"
                        print(
                            f"{Fore.LIGHTBLUE_EX}[Reference From Unstaged Version, skip]{Style.RESET_ALL} {referencer_file_ral_path} -> {now_obj.get_full_name()}"
                        )
                        continue
                    elif referencer_file_ral_path in self.jump_files:
                        "检测到的引用者来自于untracked files,跳过该引用"
                        print(
                            f"{Fore.LIGHTBLUE_EX}[Reference From Unstracked Version, skip]{Style.RESET_ALL} {referencer_file_ral_path} -> {now_obj.get_full_name()}"
                        )
                        continue
                    target_file_hiera = referencer_file_ral_path.split("/")
                    referencer_file_item = self.target_repo_hierarchical_tree.find(
                        target_file_hiera
                    )
                    if referencer_file_item == None:
                        print(
                            f'{Fore.LIGHTRED_EX}Error: Find "{referencer_file_ral_path}"(not in target repo){Style.RESET_ALL} referenced {now_obj.get_full_name()}'
                        )
                        continue
                    referencer_node = self.find_obj_with_lineno(
                        referencer_file_item, referencer_pos[1]
                    )
                    if referencer_node.obj_name == now_obj.obj_name:
                        logger.info(
                            f"Jedi find {now_obj.get_full_name()} with name_duplicate_reference, skipped"
                        )
                        continue
                    if DocItem.has_ans_relation(now_obj, referencer_node) == None:
                        if now_obj not in referencer_node.reference_who:
                            special_reference_type = (
                                referencer_node.item_type
                                in [
                                    DocItemType._function,
                                    DocItemType._sub_function,
                                    DocItemType._class_function,
                                ]
                                and referencer_node.code_start_line == referencer_pos[1]
                            )
                            referencer_node.special_reference_type.append(
                                special_reference_type
                            )
                            referencer_node.reference_who.append(now_obj)
                            now_obj.who_reference_me.append(referencer_node)
                            ref_count += 1
                for _, child in now_obj.children.items():
                    walk_file(child)

            for _, child in file_node.children.items():
                walk_file(child)

    def get_task_manager(self, now_node: DocItem, task_available_func) -> TaskManager:
        """
        Creates a task manager from documentation items, establishing dependencies based on references and the hierarchical structure of the code. Items can be filtered by a whitelist or a provided availability function. The process prioritizes tasks with fewer dependencies to resolve potential circular references.

        Args:
            target_repo_hierarchical_tree: The root node of the hierarchical tree representing the repository's structure.
            task_available_func: A function that determines whether a given documentation item should be included as a task.  Can be None.

        Returns:
            TaskManager: A TaskManager object containing tasks generated from the documentation items,
                with dependencies based on references and children within the tree.


        """

        doc_items = now_node.get_travel_list()
        if self.white_list != None:

            def in_white_list(item: DocItem):
                for cont in self.white_list:
                    if (
                        item.get_file_name() == cont["file_path"]
                        and item.obj_name == cont["id_text"]
                    ):
                        return True
                return False

            doc_items = list(filter(in_white_list, doc_items))
        doc_items = list(filter(task_available_func, doc_items))
        doc_items = sorted(doc_items, key=lambda x: x.depth)
        deal_items = []
        task_manager = TaskManager()
        bar = tqdm(total=len(doc_items), desc="parsing topology task-list")
        while doc_items:
            min_break_level = 10000000.0
            target_item = None
            for item in doc_items:
                "一个任务依赖于所有引用者和他的子节点,我们不能保证引用不成环(也许有些仓库的废代码会出现成环)。\n                这时就只能选择一个相对来说遵守程度最好的了\n                有特殊情况func-def中的param def可能会出现循环引用\n                另外循环引用真实存在,对于一些bind类的接口真的会发生,比如:\n                ChatDev/WareHouse/Gomoku_HumanAgentInteraction_20230920135038/main.py里面的: on-click、show-winner、restart\n"
                best_break_level = 0
                second_best_break_level = 0
                for _, child in item.children.items():
                    if task_available_func(child) and child not in deal_items:
                        best_break_level += 1
                for referenced, special in zip(
                    item.reference_who, item.special_reference_type
                ):
                    if task_available_func(referenced) and referenced not in deal_items:
                        best_break_level += 1
                    if (
                        task_available_func(referenced)
                        and (not special)
                        and (referenced not in deal_items)
                    ):
                        second_best_break_level += 1
                if best_break_level == 0:
                    min_break_level = -1
                    target_item = item
                    break
                if second_best_break_level < min_break_level:
                    target_item = item
                    min_break_level = second_best_break_level
            if min_break_level > 0:
                print(
                    f"circle-reference(second-best still failed), level={min_break_level}: {target_item.get_full_name()}"
                )
            item_denp_task_ids = []
            for _, child in target_item.children.items():
                if child.multithread_task_id != -1:
                    item_denp_task_ids.append(child.multithread_task_id)
            for referenced_item in target_item.reference_who:
                if referenced_item.multithread_task_id in task_manager.task_dict.keys():
                    item_denp_task_ids.append(referenced_item.multithread_task_id)
            item_denp_task_ids = list(set(item_denp_task_ids))
            if task_available_func == None or task_available_func(target_item):
                task_id = task_manager.add_task(
                    dependency_task_id=item_denp_task_ids, extra=target_item
                )
                target_item.multithread_task_id = task_id
            deal_items.append(target_item)
            doc_items.remove(target_item)
            bar.update(1)
        return task_manager

    def get_topology(self, task_available_func) -> TaskManager:
        """
        Recursively traverses the repository's hierarchical tree, applying a provided function to each item encountered.


        Args:
            older_meta: The older MetaInfo object to merge from.

        Returns:
            Optional[DocItem]: The found DocItem or None if not found.


        """

        self.parse_reference()
        task_manager = self.get_task_manager(
            self.target_repo_hierarchical_tree, task_available_func=task_available_func
        )
        return task_manager

    def _map(self, deal_func: Callable):
        """
        Recursively locates the root DocItem in the repository hierarchy.

        Args:
            now_item: The current item being processed.
            root_item: The initial root item.

        Returns:
            The root item if found, otherwise None.

        """

        def travel(now_item: DocItem):
            deal_func(now_item)
            for _, child in now_item.children.items():
                travel(child)

        travel(self.target_repo_hierarchical_tree)

    def load_doc_from_older_meta(self, older_meta: MetaInfo):
        """
        No valid docstring found.
        """
        logger.info("merge doc from an older version of metainfo")
        root_item = self.target_repo_hierarchical_tree
        deleted_items = []

        def find_item(now_item: DocItem) -> Optional[DocItem]:
            nonlocal root_item
            if now_item.father == None:
                return root_item
            father_find_result = find_item(now_item.father)
            if not father_find_result:
                return None
            real_name = None
            for child_real_name, temp_item in now_item.father.children.items():
                if temp_item == now_item:
                    real_name = child_real_name
                    break
            assert real_name != None
            if real_name in father_find_result.children.keys():
                result_item = father_find_result.children[real_name]
                return result_item
            return None

        def travel(now_older_item: DocItem):
            result_item = find_item(now_older_item)
            if not result_item:
                deleted_items.append(
                    [now_older_item.get_full_name(), now_older_item.item_type.name]
                )
                return
            result_item.md_content = now_older_item.md_content
            result_item.item_status = now_older_item.item_status
            if "code_content" in now_older_item.content.keys():
                assert "code_content" in result_item.content.keys()
                if remove_docstrings(
                    now_older_item.content["code_content"]
                ) != remove_docstrings(result_item.content["code_content"]):
                    result_item.item_status = DocItemStatus.code_changed
            for _, child in now_older_item.children.items():
                travel(child)

        travel(older_meta.target_repo_hierarchical_tree)
        "接下来,parse现在的双向引用,观察谁的引用者改了"
        self.parse_reference()

        def travel2(now_older_item: DocItem):
            result_item = find_item(now_older_item)
            if not result_item:
                return
            "result_item引用的人是否变化了"
            new_reference_names = [
                name.get_full_name(strict=True) for name in result_item.who_reference_me
            ]
            old_reference_names = now_older_item.who_reference_me_name_list
            if (
                not set(new_reference_names) == set(old_reference_names)
                and result_item.item_status == DocItemStatus.doc_up_to_date
            ):
                if set(new_reference_names) <= set(old_reference_names):
                    result_item.item_status = DocItemStatus.referencer_not_exist
                else:
                    result_item.item_status = DocItemStatus.add_new_referencer
            for _, child in now_older_item.children.items():
                travel2(child)

        travel2(older_meta.target_repo_hierarchical_tree)
        self.deleted_items_from_older_meta = deleted_items

    @staticmethod
    def from_project_hierarchy_path(repo_path: str) -> MetaInfo:
        """
        Loads a project hierarchy from a JSON file and converts it into a MetaInfo object.

        Args:
            ce_relation: A boolean flag indicating whether to include cross-entity relations.

        Returns:
            dict: A dictionary representing the file hierarchy in JSON format.

        """

        project_hierarchy_json_path = os.path.join(repo_path, "project_hierarchy.json")
        logger.info(f"parsing from {project_hierarchy_json_path}")
        if not os.path.exists(project_hierarchy_json_path):
            raise NotImplementedError("Invalid operation detected")
        with open(project_hierarchy_json_path, "r", encoding="utf-8") as reader:
            project_hierarchy_json = json.load(reader)
        return MetaInfo.from_project_hierarchy_json(project_hierarchy_json)

    def to_hierarchy_json(self, flash_reference_relation=False):
        """
        Transforms a project’s documentation structure into a JSON representation, detailing relationships and content metadata for each item. It recursively processes files and directories to build a hierarchical view of the documented elements.

        Args:
            flash_reference_relation: A boolean indicating whether to use full names for references.

        Returns:
            dict: A dictionary representing the document hierarchy in JSON format.


        """

        hierachy_json = {}
        file_item_list = self.get_all_files()
        for file_item in file_item_list:
            file_hierarchy_content = []

            def walk_file(now_obj: DocItem):
                nonlocal file_hierarchy_content, flash_reference_relation
                temp_json_obj = now_obj.content
                if "source_node" in temp_json_obj:
                    temp_json_obj.pop("source_node")
                temp_json_obj["name"] = now_obj.obj_name
                temp_json_obj["type"] = now_obj.item_type.to_str()
                temp_json_obj["md_content"] = now_obj.md_content
                temp_json_obj["item_status"] = now_obj.item_status.name
                if flash_reference_relation:
                    temp_json_obj["who_reference_me"] = [
                        cont.get_full_name(strict=True)
                        for cont in now_obj.who_reference_me
                    ]
                    temp_json_obj["reference_who"] = [
                        cont.get_full_name(strict=True)
                        for cont in now_obj.reference_who
                    ]
                    temp_json_obj["special_reference_type"] = (
                        now_obj.special_reference_type
                    )
                else:
                    temp_json_obj["who_reference_me"] = (
                        now_obj.who_reference_me_name_list
                    )
                    temp_json_obj["reference_who"] = now_obj.reference_who_name_list
                file_hierarchy_content.append(temp_json_obj)
                for _, child in now_obj.children.items():
                    walk_file(child)

            for _, child in file_item.children.items():
                walk_file(child)
            if file_item.item_type == DocItemType._dir:
                temp_json_obj = {}
                temp_json_obj["name"] = file_item.obj_name
                temp_json_obj["type"] = file_item.item_type.to_str()
                temp_json_obj["md_content"] = file_item.md_content
                temp_json_obj["item_status"] = file_item.item_status.name
                hierachy_json[file_item.get_full_name()] = [temp_json_obj]
            else:
                hierachy_json[file_item.get_full_name()] = file_hierarchy_content
        return hierachy_json

    @staticmethod
    def from_project_hierarchy_json(
        project_hierarchy_json, repo_structure: Optional[Dict[str, Any]] = None
    ) -> MetaInfo:
        """
        Constructs a hierarchical representation of the project by parsing file information and establishing relationships between documentation items, including handling potential naming conflicts and determining item types based on content.

        This method processes file content and integrates it into a hierarchical tree,
        handling cases where files are missing, empty, or have duplicate names. It also
        determines item types (e.g., directory, file, class, function) based on content analysis
        and establishes parent-child relationships between items in the tree.

        Args:
            file_name: The name of the file being processed.
            file_content: A list containing information about the file's contents.
            repo_structure:  The existing repository structure (optional).
            target_meta_info: An object to store and update project metadata, including the hierarchical tree.

        Returns:
            target_meta_info: The updated target_meta_info object with the populated or modified hierarchical tree.

        """

        setting = SettingsManager.get_setting()
        target_meta_info = MetaInfo(
            repo_structure=project_hierarchy_json,
            target_repo_hierarchical_tree=DocItem(
                item_type=DocItemType._repo, obj_name="full_repo"
            ),
        )
        for file_name, file_content in tqdm(
            project_hierarchy_json.items(), desc="parsing parent relationship"
        ):
            if not os.path.exists(os.path.join(setting.project.target_repo, file_name)):
                logger.info(f"deleted content: {file_name}")
                continue
            elif (
                os.path.getsize(os.path.join(setting.project.target_repo, file_name))
                == 0
                and file_content
                and (file_content[0]["type"] != "Dir")
            ):
                logger.info(f"blank content: {file_name}")
                continue
            recursive_file_path = file_name.split("/")
            pos = 0
            now_structure = target_meta_info.target_repo_hierarchical_tree
            while pos < len(recursive_file_path) - 1:
                if recursive_file_path[pos] not in now_structure.children.keys():
                    now_structure.children[recursive_file_path[pos]] = DocItem(
                        item_type=DocItemType._dir,
                        md_content="",
                        obj_name=recursive_file_path[pos],
                    )
                    now_structure.children[recursive_file_path[pos]].father = (
                        now_structure
                    )
                now_structure = now_structure.children[recursive_file_path[pos]]
                pos += 1
            if recursive_file_path[-1] not in now_structure.children.keys():
                if file_content and file_content[0].get("type") == "Dir":
                    doctype = DocItemType._dir
                    now_structure.children[recursive_file_path[pos]] = DocItem(
                        item_type=doctype, obj_name=recursive_file_path[-1]
                    )
                    now_structure.children[recursive_file_path[pos]].father = (
                        now_structure
                    )
                else:
                    doctype = DocItemType._file
                    now_structure.children[recursive_file_path[pos]] = DocItem(
                        item_type=doctype, obj_name=recursive_file_path[-1]
                    )
                    now_structure.children[recursive_file_path[pos]].father = (
                        now_structure
                    )
            if repo_structure:
                actual_item = repo_structure[file_name]
            else:
                actual_item = deepcopy(file_content)
            assert type(file_content) == list
            file_item = target_meta_info.target_repo_hierarchical_tree.find(
                recursive_file_path
            )
            "用类线段树的方式:\n            1.先parse所有节点,再找父子关系\n            2.一个节点的父节点,所有包含他的code范围的节点里的,最小的节点\n            复杂度是O(n^2)\n            3.最后来处理节点的type问题\n            "
            obj_item_list: List[DocItem] = []
            for value, actual in zip(file_content, actual_item):
                if value.get("source_node"):
                    source_node = value.get("source_node")
                else:
                    source_node = actual.get("source_node")
                obj_doc_item = DocItem(
                    obj_name=value["name"],
                    content=value,
                    md_content=value["md_content"],
                    code_start_line=value.get("code_start_line"),
                    code_end_line=value.get("code_end_line"),
                    source_node=source_node,
                )
                if "item_status" in value.keys():
                    obj_doc_item.item_status = DocItemStatus[value["item_status"]]
                if "reference_who" in value.keys():
                    obj_doc_item.reference_who_name_list = value["reference_who"]
                if "special_reference_type" in value.keys():
                    obj_doc_item.special_reference_type = value[
                        "special_reference_type"
                    ]
                if "who_reference_me" in value.keys():
                    obj_doc_item.who_reference_me_name_list = value["who_reference_me"]
                obj_item_list.append(obj_doc_item)
            for item in obj_item_list:
                potential_father = None
                for other_item in obj_item_list:

                    def code_contain(item, other_item) -> bool:
                        if (
                            other_item.code_end_line == item.code_end_line
                            and other_item.code_start_line == item.code_start_line
                        ):
                            return False
                        if (
                            other_item.code_end_line < item.code_end_line
                            or other_item.code_start_line > item.code_start_line
                        ):
                            return False
                        return True

                    if code_contain(item, other_item):
                        if (
                            potential_father == None
                            or other_item.code_end_line - other_item.code_start_line
                            < potential_father.code_end_line
                            - potential_father.code_start_line
                        ):
                            potential_father = other_item
                if potential_father == None:
                    potential_father = file_item
                item.father = potential_father
                child_name = item.obj_name
                if child_name in potential_father.children.keys():
                    now_name_id = 0
                    while (
                        child_name + f"_{now_name_id}"
                        in potential_father.children.keys()
                    ):
                        now_name_id += 1
                    child_name = child_name + f"_{now_name_id}"
                    logger.warning(
                        f"Name duplicate in {file_item.get_full_name()}: rename to {item.obj_name}->{child_name}"
                    )
                if potential_father.item_type != DocItemType._dir:
                    potential_father.children[child_name] = item

            def change_items(now_item: DocItem):
                if now_item.item_type == DocItemType._dir:
                    return target_meta_info
                if now_item.item_type != DocItemType._file:
                    if now_item.content["type"] == "ClassDef":
                        now_item.item_type = DocItemType._class
                    elif now_item.content["type"] == "FunctionDef":
                        now_item.item_type = DocItemType._function
                        if now_item.father.item_type == DocItemType._class:
                            now_item.item_type = DocItemType._class_function
                        elif now_item.father.item_type in [
                            DocItemType._function,
                            DocItemType._sub_function,
                        ]:
                            now_item.item_type = DocItemType._sub_function
                for _, child in now_item.children.items():
                    change_items(child)

            change_items(file_item)
        target_meta_info.target_repo_hierarchical_tree.parse_tree_path(now_path=[])
        target_meta_info.target_repo_hierarchical_tree.check_depth()
        return target_meta_info

checkpoint(target_dir_path, flash_reference_relation=False)

Persists the project’s metadata and hierarchy to a specified directory, ensuring data is saved for later use or recovery. Includes options to control the level of detail in the saved hierarchy representation.

Parameters:

Name Type Description Default
target_dir_path str | Path

The path to the directory where the checkpoint should be saved.

required
flash_reference_relation

A boolean indicating whether to include flash reference relations in the hierarchy JSON.

False

Returns:

Type Description

None

Source code in repo_agent/doc_meta_info.py
def checkpoint(self, target_dir_path: str | Path, flash_reference_relation=False):
    """
    Persists the project’s metadata and hierarchy to a specified directory, ensuring data is saved for later use or recovery. Includes options to control the level of detail in the saved hierarchy representation.

    Args:
        target_dir_path: The path to the directory where the checkpoint should be saved.
        flash_reference_relation: A boolean indicating whether to include flash reference relations in the hierarchy JSON.

    Returns:
        None


    """

    with self.checkpoint_lock:
        target_dir = Path(target_dir_path)
        logger.debug(f"Checkpointing MetaInfo to directory: {target_dir}")
        print(f"{Fore.GREEN}MetaInfo is Refreshed and Saved{Style.RESET_ALL}")
        if not target_dir.exists():
            target_dir.mkdir(parents=True, exist_ok=True)
            logger.debug(f"Created directory: {target_dir}")
        now_hierarchy_json = self.to_hierarchy_json(
            flash_reference_relation=flash_reference_relation
        )
        hierarchy_file = target_dir / "project_hierarchy.json"
        try:
            with hierarchy_file.open("w", encoding="utf-8") as writer:
                json.dump(now_hierarchy_json, writer, indent=2, ensure_ascii=False)
            logger.debug(f"Saved hierarchy JSON to {hierarchy_file}")
        except IOError as e:
            logger.error(f"Failed to save hierarchy JSON to {hierarchy_file}: {e}")
        meta_info_file = target_dir / "meta-info.json"
        meta = {
            "main_idea": SettingsManager().get_setting().project.main_idea,
            "doc_version": self.document_version,
            "in_generation_process": self.in_generation_process,
            "fake_file_reflection": self.fake_file_reflection,
            "jump_files": self.jump_files,
            "deleted_items_from_older_meta": self.deleted_items_from_older_meta,
        }
        try:
            with meta_info_file.open("w", encoding="utf-8") as writer:
                json.dump(meta, writer, indent=2, ensure_ascii=False)
            logger.debug(f"Saved meta-info JSON to {meta_info_file}")
        except IOError as e:
            logger.error(f"Failed to save meta-info JSON to {meta_info_file}: {e}")

find_obj_with_lineno(file_node, start_line_num)

No valid docstring found.

Source code in repo_agent/doc_meta_info.py
def find_obj_with_lineno(self, file_node: DocItem, start_line_num) -> DocItem:
    """
    No valid docstring found.

    """

    now_node = file_node
    assert now_node != None
    while len(now_node.children) > 0:
        find_qualify_child = False
        for _, child in now_node.children.items():
            assert child.content != None
            if (
                child.content["code_start_line"] <= start_line_num
                and child.content["code_end_line"] >= start_line_num
            ):
                now_node = child
                find_qualify_child = True
                break
        if not find_qualify_child:
            return now_node
    return now_node

from_checkpoint_path(checkpoint_dir_path, repo_structure=None) staticmethod

Loads project metadata from a checkpoint directory to restore a previous state.

Parameters:

Name Type Description Default
checkpoint_dir_path Path

The path to the checkpoint directory.

required
t_dir_path

The path to the temporary directory.

required
repo_structure Optional[Dict[str, Any]]

An optional dictionary representing the repository structure.

None

Returns:

Name Type Description
MetaInfo MetaInfo

A MetaInfo object loaded from the checkpoint data.

Source code in repo_agent/doc_meta_info.py
@staticmethod
def from_checkpoint_path(
    checkpoint_dir_path: Path, repo_structure: Optional[Dict[str, Any]] = None
) -> MetaInfo:
    """
    Loads project metadata from a checkpoint directory to restore a previous state.

    Args:
        checkpoint_dir_path: The path to the checkpoint directory.
        t_dir_path:  The path to the temporary directory.
        repo_structure: An optional dictionary representing the repository structure.

    Returns:
        MetaInfo: A MetaInfo object loaded from the checkpoint data.


    """

    setting = SettingsManager.get_setting()
    project_hierarchy_json_path = checkpoint_dir_path / "project_hierarchy.json"
    with open(project_hierarchy_json_path, "r", encoding="utf-8") as reader:
        project_hierarchy_json = json.load(reader)
    metainfo = MetaInfo.from_project_hierarchy_json(
        project_hierarchy_json, repo_structure
    )
    with open(
        checkpoint_dir_path / "meta-info.json", "r", encoding="utf-8"
    ) as reader:
        meta_data = json.load(reader)
        metainfo.repo_path = setting.project.target_repo
        metainfo.main_idea = meta_data["main_idea"]
        metainfo.document_version = meta_data["doc_version"]
        metainfo.fake_file_reflection = meta_data["fake_file_reflection"]
        metainfo.jump_files = meta_data["jump_files"]
        metainfo.in_generation_process = meta_data["in_generation_process"]
        metainfo.deleted_items_from_older_meta = meta_data[
            "deleted_items_from_older_meta"
        ]
    print(f"{Fore.CYAN}Loading MetaInfo:{Style.RESET_ALL} {checkpoint_dir_path}")
    return metainfo

from_project_hierarchy_json(project_hierarchy_json, repo_structure=None) staticmethod

Constructs a hierarchical representation of the project by parsing file information and establishing relationships between documentation items, including handling potential naming conflicts and determining item types based on content.

This method processes file content and integrates it into a hierarchical tree, handling cases where files are missing, empty, or have duplicate names. It also determines item types (e.g., directory, file, class, function) based on content analysis and establishes parent-child relationships between items in the tree.

Parameters:

Name Type Description Default
file_name

The name of the file being processed.

required
file_content

A list containing information about the file's contents.

required
repo_structure Optional[Dict[str, Any]]

The existing repository structure (optional).

None
target_meta_info

An object to store and update project metadata, including the hierarchical tree.

required

Returns:

Name Type Description
target_meta_info MetaInfo

The updated target_meta_info object with the populated or modified hierarchical tree.

Source code in repo_agent/doc_meta_info.py
@staticmethod
def from_project_hierarchy_json(
    project_hierarchy_json, repo_structure: Optional[Dict[str, Any]] = None
) -> MetaInfo:
    """
    Constructs a hierarchical representation of the project by parsing file information and establishing relationships between documentation items, including handling potential naming conflicts and determining item types based on content.

    This method processes file content and integrates it into a hierarchical tree,
    handling cases where files are missing, empty, or have duplicate names. It also
    determines item types (e.g., directory, file, class, function) based on content analysis
    and establishes parent-child relationships between items in the tree.

    Args:
        file_name: The name of the file being processed.
        file_content: A list containing information about the file's contents.
        repo_structure:  The existing repository structure (optional).
        target_meta_info: An object to store and update project metadata, including the hierarchical tree.

    Returns:
        target_meta_info: The updated target_meta_info object with the populated or modified hierarchical tree.

    """

    setting = SettingsManager.get_setting()
    target_meta_info = MetaInfo(
        repo_structure=project_hierarchy_json,
        target_repo_hierarchical_tree=DocItem(
            item_type=DocItemType._repo, obj_name="full_repo"
        ),
    )
    for file_name, file_content in tqdm(
        project_hierarchy_json.items(), desc="parsing parent relationship"
    ):
        if not os.path.exists(os.path.join(setting.project.target_repo, file_name)):
            logger.info(f"deleted content: {file_name}")
            continue
        elif (
            os.path.getsize(os.path.join(setting.project.target_repo, file_name))
            == 0
            and file_content
            and (file_content[0]["type"] != "Dir")
        ):
            logger.info(f"blank content: {file_name}")
            continue
        recursive_file_path = file_name.split("/")
        pos = 0
        now_structure = target_meta_info.target_repo_hierarchical_tree
        while pos < len(recursive_file_path) - 1:
            if recursive_file_path[pos] not in now_structure.children.keys():
                now_structure.children[recursive_file_path[pos]] = DocItem(
                    item_type=DocItemType._dir,
                    md_content="",
                    obj_name=recursive_file_path[pos],
                )
                now_structure.children[recursive_file_path[pos]].father = (
                    now_structure
                )
            now_structure = now_structure.children[recursive_file_path[pos]]
            pos += 1
        if recursive_file_path[-1] not in now_structure.children.keys():
            if file_content and file_content[0].get("type") == "Dir":
                doctype = DocItemType._dir
                now_structure.children[recursive_file_path[pos]] = DocItem(
                    item_type=doctype, obj_name=recursive_file_path[-1]
                )
                now_structure.children[recursive_file_path[pos]].father = (
                    now_structure
                )
            else:
                doctype = DocItemType._file
                now_structure.children[recursive_file_path[pos]] = DocItem(
                    item_type=doctype, obj_name=recursive_file_path[-1]
                )
                now_structure.children[recursive_file_path[pos]].father = (
                    now_structure
                )
        if repo_structure:
            actual_item = repo_structure[file_name]
        else:
            actual_item = deepcopy(file_content)
        assert type(file_content) == list
        file_item = target_meta_info.target_repo_hierarchical_tree.find(
            recursive_file_path
        )
        "用类线段树的方式:\n            1.先parse所有节点,再找父子关系\n            2.一个节点的父节点,所有包含他的code范围的节点里的,最小的节点\n            复杂度是O(n^2)\n            3.最后来处理节点的type问题\n            "
        obj_item_list: List[DocItem] = []
        for value, actual in zip(file_content, actual_item):
            if value.get("source_node"):
                source_node = value.get("source_node")
            else:
                source_node = actual.get("source_node")
            obj_doc_item = DocItem(
                obj_name=value["name"],
                content=value,
                md_content=value["md_content"],
                code_start_line=value.get("code_start_line"),
                code_end_line=value.get("code_end_line"),
                source_node=source_node,
            )
            if "item_status" in value.keys():
                obj_doc_item.item_status = DocItemStatus[value["item_status"]]
            if "reference_who" in value.keys():
                obj_doc_item.reference_who_name_list = value["reference_who"]
            if "special_reference_type" in value.keys():
                obj_doc_item.special_reference_type = value[
                    "special_reference_type"
                ]
            if "who_reference_me" in value.keys():
                obj_doc_item.who_reference_me_name_list = value["who_reference_me"]
            obj_item_list.append(obj_doc_item)
        for item in obj_item_list:
            potential_father = None
            for other_item in obj_item_list:

                def code_contain(item, other_item) -> bool:
                    if (
                        other_item.code_end_line == item.code_end_line
                        and other_item.code_start_line == item.code_start_line
                    ):
                        return False
                    if (
                        other_item.code_end_line < item.code_end_line
                        or other_item.code_start_line > item.code_start_line
                    ):
                        return False
                    return True

                if code_contain(item, other_item):
                    if (
                        potential_father == None
                        or other_item.code_end_line - other_item.code_start_line
                        < potential_father.code_end_line
                        - potential_father.code_start_line
                    ):
                        potential_father = other_item
            if potential_father == None:
                potential_father = file_item
            item.father = potential_father
            child_name = item.obj_name
            if child_name in potential_father.children.keys():
                now_name_id = 0
                while (
                    child_name + f"_{now_name_id}"
                    in potential_father.children.keys()
                ):
                    now_name_id += 1
                child_name = child_name + f"_{now_name_id}"
                logger.warning(
                    f"Name duplicate in {file_item.get_full_name()}: rename to {item.obj_name}->{child_name}"
                )
            if potential_father.item_type != DocItemType._dir:
                potential_father.children[child_name] = item

        def change_items(now_item: DocItem):
            if now_item.item_type == DocItemType._dir:
                return target_meta_info
            if now_item.item_type != DocItemType._file:
                if now_item.content["type"] == "ClassDef":
                    now_item.item_type = DocItemType._class
                elif now_item.content["type"] == "FunctionDef":
                    now_item.item_type = DocItemType._function
                    if now_item.father.item_type == DocItemType._class:
                        now_item.item_type = DocItemType._class_function
                    elif now_item.father.item_type in [
                        DocItemType._function,
                        DocItemType._sub_function,
                    ]:
                        now_item.item_type = DocItemType._sub_function
            for _, child in now_item.children.items():
                change_items(child)

        change_items(file_item)
    target_meta_info.target_repo_hierarchical_tree.parse_tree_path(now_path=[])
    target_meta_info.target_repo_hierarchical_tree.check_depth()
    return target_meta_info

from_project_hierarchy_path(repo_path) staticmethod

Loads a project hierarchy from a JSON file and converts it into a MetaInfo object.

Parameters:

Name Type Description Default
ce_relation

A boolean flag indicating whether to include cross-entity relations.

required

Returns:

Name Type Description
dict MetaInfo

A dictionary representing the file hierarchy in JSON format.

Source code in repo_agent/doc_meta_info.py
@staticmethod
def from_project_hierarchy_path(repo_path: str) -> MetaInfo:
    """
    Loads a project hierarchy from a JSON file and converts it into a MetaInfo object.

    Args:
        ce_relation: A boolean flag indicating whether to include cross-entity relations.

    Returns:
        dict: A dictionary representing the file hierarchy in JSON format.

    """

    project_hierarchy_json_path = os.path.join(repo_path, "project_hierarchy.json")
    logger.info(f"parsing from {project_hierarchy_json_path}")
    if not os.path.exists(project_hierarchy_json_path):
        raise NotImplementedError("Invalid operation detected")
    with open(project_hierarchy_json_path, "r", encoding="utf-8") as reader:
        project_hierarchy_json = json.load(reader)
    return MetaInfo.from_project_hierarchy_json(project_hierarchy_json)

get_all_files(count_repo=False)

Returns a list of all items – files and directories – within the project's structure. Optionally includes the root repository item in the results.

Parameters:

Name Type Description Default
count_repo

Whether to include the root repo item in the results.

False

Returns:

Type Description
List[DocItem]

A list of DocItem objects representing all files and directories.

Source code in repo_agent/doc_meta_info.py
def get_all_files(self, count_repo=False) -> List[DocItem]:
    """
    Returns a list of all items – files and directories – within the project's structure. Optionally includes the root repository item in the results.

    Args:
        count_repo: Whether to include the root repo item in the results.

    Returns:
        A list of DocItem objects representing all files and directories.


    """

    files = []

    def walk_tree(now_node):
        if now_node.item_type in [DocItemType._file, DocItemType._dir]:
            files.append(now_node)
        if count_repo and now_node.item_type == DocItemType._repo:
            files.append(now_node)
        for _, child in now_node.children.items():
            walk_tree(child)

    walk_tree(self.target_repo_hierarchical_tree)
    return files

get_task_manager(now_node, task_available_func)

Creates a task manager from documentation items, establishing dependencies based on references and the hierarchical structure of the code. Items can be filtered by a whitelist or a provided availability function. The process prioritizes tasks with fewer dependencies to resolve potential circular references.

Parameters:

Name Type Description Default
target_repo_hierarchical_tree

The root node of the hierarchical tree representing the repository's structure.

required
task_available_func

A function that determines whether a given documentation item should be included as a task. Can be None.

required

Returns:

Name Type Description
TaskManager TaskManager

A TaskManager object containing tasks generated from the documentation items, with dependencies based on references and children within the tree.

Source code in repo_agent/doc_meta_info.py
def get_task_manager(self, now_node: DocItem, task_available_func) -> TaskManager:
    """
    Creates a task manager from documentation items, establishing dependencies based on references and the hierarchical structure of the code. Items can be filtered by a whitelist or a provided availability function. The process prioritizes tasks with fewer dependencies to resolve potential circular references.

    Args:
        target_repo_hierarchical_tree: The root node of the hierarchical tree representing the repository's structure.
        task_available_func: A function that determines whether a given documentation item should be included as a task.  Can be None.

    Returns:
        TaskManager: A TaskManager object containing tasks generated from the documentation items,
            with dependencies based on references and children within the tree.


    """

    doc_items = now_node.get_travel_list()
    if self.white_list != None:

        def in_white_list(item: DocItem):
            for cont in self.white_list:
                if (
                    item.get_file_name() == cont["file_path"]
                    and item.obj_name == cont["id_text"]
                ):
                    return True
            return False

        doc_items = list(filter(in_white_list, doc_items))
    doc_items = list(filter(task_available_func, doc_items))
    doc_items = sorted(doc_items, key=lambda x: x.depth)
    deal_items = []
    task_manager = TaskManager()
    bar = tqdm(total=len(doc_items), desc="parsing topology task-list")
    while doc_items:
        min_break_level = 10000000.0
        target_item = None
        for item in doc_items:
            "一个任务依赖于所有引用者和他的子节点,我们不能保证引用不成环(也许有些仓库的废代码会出现成环)。\n                这时就只能选择一个相对来说遵守程度最好的了\n                有特殊情况func-def中的param def可能会出现循环引用\n                另外循环引用真实存在,对于一些bind类的接口真的会发生,比如:\n                ChatDev/WareHouse/Gomoku_HumanAgentInteraction_20230920135038/main.py里面的: on-click、show-winner、restart\n"
            best_break_level = 0
            second_best_break_level = 0
            for _, child in item.children.items():
                if task_available_func(child) and child not in deal_items:
                    best_break_level += 1
            for referenced, special in zip(
                item.reference_who, item.special_reference_type
            ):
                if task_available_func(referenced) and referenced not in deal_items:
                    best_break_level += 1
                if (
                    task_available_func(referenced)
                    and (not special)
                    and (referenced not in deal_items)
                ):
                    second_best_break_level += 1
            if best_break_level == 0:
                min_break_level = -1
                target_item = item
                break
            if second_best_break_level < min_break_level:
                target_item = item
                min_break_level = second_best_break_level
        if min_break_level > 0:
            print(
                f"circle-reference(second-best still failed), level={min_break_level}: {target_item.get_full_name()}"
            )
        item_denp_task_ids = []
        for _, child in target_item.children.items():
            if child.multithread_task_id != -1:
                item_denp_task_ids.append(child.multithread_task_id)
        for referenced_item in target_item.reference_who:
            if referenced_item.multithread_task_id in task_manager.task_dict.keys():
                item_denp_task_ids.append(referenced_item.multithread_task_id)
        item_denp_task_ids = list(set(item_denp_task_ids))
        if task_available_func == None or task_available_func(target_item):
            task_id = task_manager.add_task(
                dependency_task_id=item_denp_task_ids, extra=target_item
            )
            target_item.multithread_task_id = task_id
        deal_items.append(target_item)
        doc_items.remove(target_item)
        bar.update(1)
    return task_manager

get_topology(task_available_func)

Recursively traverses the repository's hierarchical tree, applying a provided function to each item encountered.

Parameters:

Name Type Description Default
older_meta

The older MetaInfo object to merge from.

required

Returns:

Type Description
TaskManager

Optional[DocItem]: The found DocItem or None if not found.

Source code in repo_agent/doc_meta_info.py
def get_topology(self, task_available_func) -> TaskManager:
    """
    Recursively traverses the repository's hierarchical tree, applying a provided function to each item encountered.


    Args:
        older_meta: The older MetaInfo object to merge from.

    Returns:
        Optional[DocItem]: The found DocItem or None if not found.


    """

    self.parse_reference()
    task_manager = self.get_task_manager(
        self.target_repo_hierarchical_tree, task_available_func=task_available_func
    )
    return task_manager

init_meta_info(file_path_reflections, jump_files) staticmethod

Creates a MetaInfo object representing the project’s structure by analyzing files and directories.

Parameters:

Name Type Description Default
file_path_reflections

The file path reflections to use.

required
jump_files

The jump files to use.

required

Returns:

Name Type Description
MetaInfo MetaInfo

A MetaInfo object representing the project's metadata.

Source code in repo_agent/doc_meta_info.py
@staticmethod
def init_meta_info(file_path_reflections, jump_files) -> MetaInfo:
    """
    Creates a MetaInfo object representing the project’s structure by analyzing files and directories.

    Args:
        file_path_reflections: The file path reflections to use.
        jump_files: The jump files to use.

    Returns:
        MetaInfo: A MetaInfo object representing the project's metadata.


    """

    setting = SettingsManager.get_setting()
    project_abs_path = setting.project.target_repo
    print(
        f"{Fore.LIGHTRED_EX}Initializing MetaInfo: {Style.RESET_ALL}from {project_abs_path}"
    )
    file_handler = FileHandler(project_abs_path, None)
    repo_structure = file_handler.generate_overall_structure(
        file_path_reflections, jump_files
    )
    metainfo = MetaInfo.from_project_hierarchy_json(repo_structure)
    metainfo.repo_path = project_abs_path
    metainfo.fake_file_reflection = file_path_reflections
    metainfo.jump_files = jump_files
    return metainfo

load_doc_from_older_meta(older_meta)

No valid docstring found.

Source code in repo_agent/doc_meta_info.py
def load_doc_from_older_meta(self, older_meta: MetaInfo):
    """
    No valid docstring found.
    """
    logger.info("merge doc from an older version of metainfo")
    root_item = self.target_repo_hierarchical_tree
    deleted_items = []

    def find_item(now_item: DocItem) -> Optional[DocItem]:
        nonlocal root_item
        if now_item.father == None:
            return root_item
        father_find_result = find_item(now_item.father)
        if not father_find_result:
            return None
        real_name = None
        for child_real_name, temp_item in now_item.father.children.items():
            if temp_item == now_item:
                real_name = child_real_name
                break
        assert real_name != None
        if real_name in father_find_result.children.keys():
            result_item = father_find_result.children[real_name]
            return result_item
        return None

    def travel(now_older_item: DocItem):
        result_item = find_item(now_older_item)
        if not result_item:
            deleted_items.append(
                [now_older_item.get_full_name(), now_older_item.item_type.name]
            )
            return
        result_item.md_content = now_older_item.md_content
        result_item.item_status = now_older_item.item_status
        if "code_content" in now_older_item.content.keys():
            assert "code_content" in result_item.content.keys()
            if remove_docstrings(
                now_older_item.content["code_content"]
            ) != remove_docstrings(result_item.content["code_content"]):
                result_item.item_status = DocItemStatus.code_changed
        for _, child in now_older_item.children.items():
            travel(child)

    travel(older_meta.target_repo_hierarchical_tree)
    "接下来,parse现在的双向引用,观察谁的引用者改了"
    self.parse_reference()

    def travel2(now_older_item: DocItem):
        result_item = find_item(now_older_item)
        if not result_item:
            return
        "result_item引用的人是否变化了"
        new_reference_names = [
            name.get_full_name(strict=True) for name in result_item.who_reference_me
        ]
        old_reference_names = now_older_item.who_reference_me_name_list
        if (
            not set(new_reference_names) == set(old_reference_names)
            and result_item.item_status == DocItemStatus.doc_up_to_date
        ):
            if set(new_reference_names) <= set(old_reference_names):
                result_item.item_status = DocItemStatus.referencer_not_exist
            else:
                result_item.item_status = DocItemStatus.add_new_referencer
        for _, child in now_older_item.children.items():
            travel2(child)

    travel2(older_meta.target_repo_hierarchical_tree)
    self.deleted_items_from_older_meta = deleted_items

parse_reference()

No valid docstring found.

Source code in repo_agent/doc_meta_info.py
def parse_reference(self):
    """
    No valid docstring found.

    """

    file_nodes = self.get_all_files()
    white_list_file_names, white_list_obj_names = ([], [])
    if self.white_list != None:
        white_list_file_names = [cont["file_path"] for cont in self.white_list]
        white_list_obj_names = [cont["id_text"] for cont in self.white_list]
    for file_node in tqdm(file_nodes, desc="parsing bidirectional reference"):
        "检测一个文件内的所有引用信息,只能检测引用该文件内某个obj的其他内容。\n            1. 如果某个文件是jump-files,就不应该出现在这个循环里\n            2. 如果检测到的引用信息来源于一个jump-files, 忽略它\n            3. 如果检测到一个引用来源于fake-file,则认为他的母文件是原来的文件\n"
        assert not file_node.get_full_name().endswith(latest_verison_substring)
        ref_count = 0
        rel_file_path = file_node.get_full_name()
        assert rel_file_path not in self.jump_files
        if (
            white_list_file_names != []
            and file_node.get_file_name() not in white_list_file_names
        ):
            continue

        def walk_file(now_obj: DocItem):
            """在文件内遍历所有变量"""
            nonlocal ref_count, white_list_file_names
            in_file_only = False
            if (
                white_list_obj_names != []
                and now_obj.obj_name not in white_list_obj_names
            ):
                in_file_only = True
            if SettingsManager().get_setting().project.parse_references:
                reference_list = find_all_referencer(
                    repo_path=self.repo_path,
                    variable_name=now_obj.obj_name,
                    file_path=rel_file_path,
                    line_number=now_obj.content["code_start_line"],
                    column_number=now_obj.content["name_column"],
                    in_file_only=in_file_only,
                )
            else:
                reference_list = []
            for referencer_pos in reference_list:
                referencer_file_ral_path = referencer_pos[0]
                if referencer_file_ral_path in self.fake_file_reflection.values():
                    "检测到的引用者来自于unstaged files,跳过该引用"
                    print(
                        f"{Fore.LIGHTBLUE_EX}[Reference From Unstaged Version, skip]{Style.RESET_ALL} {referencer_file_ral_path} -> {now_obj.get_full_name()}"
                    )
                    continue
                elif referencer_file_ral_path in self.jump_files:
                    "检测到的引用者来自于untracked files,跳过该引用"
                    print(
                        f"{Fore.LIGHTBLUE_EX}[Reference From Unstracked Version, skip]{Style.RESET_ALL} {referencer_file_ral_path} -> {now_obj.get_full_name()}"
                    )
                    continue
                target_file_hiera = referencer_file_ral_path.split("/")
                referencer_file_item = self.target_repo_hierarchical_tree.find(
                    target_file_hiera
                )
                if referencer_file_item == None:
                    print(
                        f'{Fore.LIGHTRED_EX}Error: Find "{referencer_file_ral_path}"(not in target repo){Style.RESET_ALL} referenced {now_obj.get_full_name()}'
                    )
                    continue
                referencer_node = self.find_obj_with_lineno(
                    referencer_file_item, referencer_pos[1]
                )
                if referencer_node.obj_name == now_obj.obj_name:
                    logger.info(
                        f"Jedi find {now_obj.get_full_name()} with name_duplicate_reference, skipped"
                    )
                    continue
                if DocItem.has_ans_relation(now_obj, referencer_node) == None:
                    if now_obj not in referencer_node.reference_who:
                        special_reference_type = (
                            referencer_node.item_type
                            in [
                                DocItemType._function,
                                DocItemType._sub_function,
                                DocItemType._class_function,
                            ]
                            and referencer_node.code_start_line == referencer_pos[1]
                        )
                        referencer_node.special_reference_type.append(
                            special_reference_type
                        )
                        referencer_node.reference_who.append(now_obj)
                        now_obj.who_reference_me.append(referencer_node)
                        ref_count += 1
            for _, child in now_obj.children.items():
                walk_file(child)

        for _, child in file_node.children.items():
            walk_file(child)

print_task_list(task_dict)

Displays a table summarizing task details, including ID, reason for documentation generation, file path, and dependencies. Dependency lists are truncated for brevity if they exceed a certain length.

Parameters:

Name Type Description Default
task_dict Dict[Task]

A dictionary where keys are task IDs and values are Task objects.

required

Returns:

Type Description

None

Source code in repo_agent/doc_meta_info.py
def print_task_list(self, task_dict: Dict[Task]):
    """
    Displays a table summarizing task details, including ID, reason for documentation generation, file path, and dependencies. Dependency lists are truncated for brevity if they exceed a certain length.

    Args:
        task_dict: A dictionary where keys are task IDs and values are Task objects.

    Returns:
        None


    """

    task_table = PrettyTable(
        ["task_id", "Doc Generation Reason", "Path", "dependency"]
    )
    for task_id, task_info in task_dict.items():
        remain_str = "None"
        if task_info.dependencies != []:
            remain_str = ",".join(
                [str(d_task.task_id) for d_task in task_info.dependencies]
            )
            if len(remain_str) > 20:
                remain_str = remain_str[:8] + "..." + remain_str[-8:]
        task_table.add_row(
            [
                task_id,
                task_info.extra_info.item_status.name,
                task_info.extra_info.get_full_name(strict=True),
                remain_str,
            ]
        )
    print(task_table)

to_hierarchy_json(flash_reference_relation=False)

Transforms a project’s documentation structure into a JSON representation, detailing relationships and content metadata for each item. It recursively processes files and directories to build a hierarchical view of the documented elements.

Parameters:

Name Type Description Default
flash_reference_relation

A boolean indicating whether to use full names for references.

False

Returns:

Name Type Description
dict

A dictionary representing the document hierarchy in JSON format.

Source code in repo_agent/doc_meta_info.py
def to_hierarchy_json(self, flash_reference_relation=False):
    """
    Transforms a project’s documentation structure into a JSON representation, detailing relationships and content metadata for each item. It recursively processes files and directories to build a hierarchical view of the documented elements.

    Args:
        flash_reference_relation: A boolean indicating whether to use full names for references.

    Returns:
        dict: A dictionary representing the document hierarchy in JSON format.


    """

    hierachy_json = {}
    file_item_list = self.get_all_files()
    for file_item in file_item_list:
        file_hierarchy_content = []

        def walk_file(now_obj: DocItem):
            nonlocal file_hierarchy_content, flash_reference_relation
            temp_json_obj = now_obj.content
            if "source_node" in temp_json_obj:
                temp_json_obj.pop("source_node")
            temp_json_obj["name"] = now_obj.obj_name
            temp_json_obj["type"] = now_obj.item_type.to_str()
            temp_json_obj["md_content"] = now_obj.md_content
            temp_json_obj["item_status"] = now_obj.item_status.name
            if flash_reference_relation:
                temp_json_obj["who_reference_me"] = [
                    cont.get_full_name(strict=True)
                    for cont in now_obj.who_reference_me
                ]
                temp_json_obj["reference_who"] = [
                    cont.get_full_name(strict=True)
                    for cont in now_obj.reference_who
                ]
                temp_json_obj["special_reference_type"] = (
                    now_obj.special_reference_type
                )
            else:
                temp_json_obj["who_reference_me"] = (
                    now_obj.who_reference_me_name_list
                )
                temp_json_obj["reference_who"] = now_obj.reference_who_name_list
            file_hierarchy_content.append(temp_json_obj)
            for _, child in now_obj.children.items():
                walk_file(child)

        for _, child in file_item.children.items():
            walk_file(child)
        if file_item.item_type == DocItemType._dir:
            temp_json_obj = {}
            temp_json_obj["name"] = file_item.obj_name
            temp_json_obj["type"] = file_item.item_type.to_str()
            temp_json_obj["md_content"] = file_item.md_content
            temp_json_obj["item_status"] = file_item.item_status.name
            hierachy_json[file_item.get_full_name()] = [temp_json_obj]
        else:
            hierachy_json[file_item.get_full_name()] = file_hierarchy_content
    return hierachy_json

find_all_referencer(repo_path, variable_name, file_path, line_number, column_number, in_file_only=False)

Locates all uses of a variable throughout the codebase.

Parameters:

Name Type Description Default
variable_name

The name of the variable to find references for.

required
file_path

The path to the file where the variable is defined.

required
line_number

The line number where the variable is defined.

required
column_number

The column number where the variable is defined.

required
in_file_only

If True, limits the search to within the defining file.

False

Returns:

Name Type Description

A list of tuples, each containing the relative path to a referencing file, and the line and column numbers of the reference. The original definition location is excluded. Returns an empty list if any error occurs during processing.

Args

variable_name: The name of the variable to search for. file_path: The path to the file containing the variable. line_number: The line number where the variable is defined. column_number: The column number where the variable is defined. in_file_only: If True, only searches for references within the same file.

Returns

list[tuple[str, int, int]]: A list of tuples containing the relative path to the referencing file, the line number, and the column number of each reference, excluding the original definition location. Returns an empty list if an error occurs.

Source code in repo_agent/doc_meta_info.py
def find_all_referencer(
    repo_path, variable_name, file_path, line_number, column_number, in_file_only=False
):
    """
    Locates all uses of a variable throughout the codebase.

    Args:
        variable_name: The name of the variable to find references for.
        file_path: The path to the file where the variable is defined.
        line_number: The line number where the variable is defined.
        column_number: The column number where the variable is defined.
        in_file_only: If True, limits the search to within the defining file.

    Returns:
        A list of tuples, each containing the relative path to a referencing file, and the line and column numbers of the reference.  The original definition location is excluded. Returns an empty list if any error occurs during processing.


        Args:
            variable_name: The name of the variable to search for.
            file_path: The path to the file containing the variable.
            line_number: The line number where the variable is defined.
            column_number: The column number where the variable is defined.
            in_file_only: If True, only searches for references within the same file.

        Returns:
            list[tuple[str, int, int]]: A list of tuples containing the relative path to the referencing file,
                                         the line number, and the column number of each reference, excluding
                                         the original definition location.  Returns an empty list if an error occurs.


    """

    script = jedi.Script(path=os.path.join(repo_path, file_path))
    try:
        if in_file_only:
            references = script.get_references(
                line=line_number, column=column_number, scope="file"
            )
        else:
            references = script.get_references(line=line_number, column=column_number)
        variable_references = [ref for ref in references if ref.name == variable_name]
        return [
            (os.path.relpath(ref.module_path, repo_path), ref.line, ref.column)
            for ref in variable_references
            if not (ref.line == line_number and ref.column == column_number)
        ]
    except Exception as e:
        logger.error(f"Error occurred: {e}")
        logger.error(
            f"Parameters: variable_name={variable_name}, file_path={file_path}, line_number={line_number}, column_number={column_number}"
        )
        return []

need_to_generate(doc_item, ignore_list=[])

No valid docstring found.

Source code in repo_agent/doc_meta_info.py
def need_to_generate(doc_item: DocItem, ignore_list: List[str] = []) -> bool:
    """
    No valid docstring found.

    """

    if doc_item.item_status == DocItemStatus.doc_up_to_date:
        return False
    rel_file_path = doc_item.get_full_name()
    doc_item = doc_item.father
    while doc_item:
        if doc_item.item_type == DocItemType._file:
            if any(
                (rel_file_path.startswith(ignore_item) for ignore_item in ignore_list)
            ):
                return False
            else:
                return True
        doc_item = doc_item.father
    return False