diff --git a/.github/scripts/analyze_architecture.py b/.github/scripts/analyze_architecture.py new file mode 100644 index 0000000000000..a717ee8ac627a --- /dev/null +++ b/.github/scripts/analyze_architecture.py @@ -0,0 +1,109 @@ +import os +import re +from collections import defaultdict +import networkx as nx +import matplotlib.pyplot as plt + +class KafkaArchitectureAnalyzer: + def __init__(self): + self.dependency_graph = nx.DiGraph() + self.package_dependencies = defaultdict(set) + self.violations = [] + + def analyze_java_file(self, file_path): + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Extract package name + package_match = re.search(r'package\s+([\w.]+);', content) + if not package_match: + return + + current_package = package_match.group(1) + + # Extract imports + imports = re.findall(r'import\s+([\w.]+\*?);', content) + + # Add dependencies to graph + for imp in imports: + if imp.startswith('org.apache.kafka'): + base_package = '.'.join(imp.split('.')[:4]) # Get main package + if base_package != current_package: + self.package_dependencies[current_package].add(base_package) + self.dependency_graph.add_edge(current_package, base_package) + + # Check for architectural violations + self.check_violations(current_package, base_package) + + def check_violations(self, source, target): + # Define architectural rules + client_packages = ['org.apache.kafka.clients'] + server_packages = ['org.apache.kafka.server'] + common_packages = ['org.apache.kafka.common'] + + # Rule 1: Clients should not depend on server internals + if any(source.startswith(p) for p in client_packages): + if any(target.startswith(p) for p in server_packages): + self.violations.append(f"Violation: Client package {source} should not depend on server package {target}") + + # Rule 2: Server can depend on common + # Rule 3: Everyone can depend on common + # These are allowed, so no checks needed + + def analyze_directory(self, root_dir): + for dirpath, _, filenames in os.walk(root_dir): + for filename in filenames: + if filename.endswith('.java'): + file_path = os.path.join(dirpath, filename) + self.analyze_java_file(file_path) + + def generate_report(self): + # Create report + report = ["=== Kafka Architecture Analysis Report ===\n"] + + # Add dependency statistics + report.append("\n=== Package Dependencies ===") + for pkg, deps in self.package_dependencies.items(): + report.append(f"\n{pkg} depends on:") + for dep in deps: + report.append(f" - {dep}") + + # Add violations + if self.violations: + report.append("\n=== Architectural Violations ===") + for violation in self.violations: + report.append(violation) + else: + report.append("\n=== No Architectural Violations Found ===") + + # Save report + with open('architecture_report.txt', 'w') as f: + f.write('\n'.join(report)) + + # Generate visualization + plt.figure(figsize=(15, 15)) + pos = nx.spring_layout(self.dependency_graph) + nx.draw(self.dependency_graph, pos, with_labels=True, node_color='lightblue', + node_size=2000, font_size=8, font_weight='bold', arrows=True) + plt.savefig('dependency_graph.png') + plt.close() + +def main(): + analyzer = KafkaArchitectureAnalyzer() + + # Analyze main Kafka directories + kafka_dirs = [ + 'core/src/main/java', + 'clients/src/main/java', + 'connect/src/main/java', + 'streams/src/main/java' + ] + + for directory in kafka_dirs: + if os.path.exists(directory): + analyzer.analyze_directory(directory) + + analyzer.generate_report() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.github/workflows/architecture-analysis.yml b/.github/workflows/architecture-analysis.yml new file mode 100644 index 0000000000000..0e28b5aa110bb --- /dev/null +++ b/.github/workflows/architecture-analysis.yml @@ -0,0 +1,36 @@ +name: Kafka Architecture Analysis + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + analyze: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install networkx pydot matplotlib + + - name: Analyze Architecture + run: | + python .github/scripts/analyze_architecture.py + + - name: Upload Analysis Results + uses: actions/upload-artifact@v3 + with: + name: architecture-analysis + path: | + architecture_report.txt + dependency_graph.png \ No newline at end of file diff --git a/.github/workflows/architecture.yml b/.github/workflows/architecture.yml new file mode 100644 index 0000000000000..cff627c9f49ed --- /dev/null +++ b/.github/workflows/architecture.yml @@ -0,0 +1,49 @@ +name: Generate Kafka Architecture Diagram + +on: + push: + branches: [ trunk ] + workflow_dispatch: + +permissions: + contents: write + +jobs: + generate: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + ref: trunk + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + cache: 'pip' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y graphviz graphviz-dev + python -m pip install --upgrade pip + pip install diagrams + + - name: Create architecture diagram + run: | + python architecture_generator.py + ls -la + + - name: Configure Git + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + + - name: Commit and push changes + run: | + git pull --rebase origin trunk + git add -f kafka_architecture.png + git commit -m "Add Kafka architecture diagram [skip ci]" || echo "No changes to commit" + git push diff --git a/architecture_generator.py b/architecture_generator.py new file mode 100644 index 0000000000000..f7d71160ab910 --- /dev/null +++ b/architecture_generator.py @@ -0,0 +1,86 @@ +from diagrams import Diagram, Cluster, Edge +from diagrams.programming.language import Java +from diagrams.programming.framework import Spring +from diagrams.onprem.queue import Kafka +from diagrams.onprem.client import Client +from diagrams.generic.storage import Storage +from diagrams.onprem.compute import Server +from diagrams.onprem.monitoring import Grafana + +def generate_architecture(): + """Generate architecture diagram for Apache Kafka.""" + + graph_attr = { + "fontsize": "45", + "bgcolor": "white", + "splines": "ortho", + "pad": "2.0", + "ranksep": "1.5", + "nodesep": "1.0" + } + + node_attr = { + "fontsize": "14" + } + + edge_attr = { + "fontsize": "12" + } + + with Diagram( + "Apache Kafka Architecture", + show=False, + direction="LR", # Changed to left-to-right + graph_attr=graph_attr, + node_attr=node_attr, + edge_attr=edge_attr, + filename="kafka_architecture" + ): + with Cluster("Kafka Cluster"): + brokers = [ + Kafka("Broker 1"), + Kafka("Broker 2"), + Kafka("Broker 3") + ] + + kafka_connect = Spring("Kafka Connect") + streams = Java("Kafka Streams") + + with Cluster("Storage & Coordination"): + zk = Grafana("ZooKeeper") + storage = Storage("Log Storage") + + with Cluster("Clients"): + producers = [ + Client("Producer 1"), + Client("Producer 2") + ] + + consumers = [ + Client("Consumer 1"), + Client("Consumer 2") + ] + + # Connect ZooKeeper to brokers with better spacing + zk >> Edge(color="red", style="dashed", minlen="2", label="coordination") >> brokers[0] + zk >> Edge(color="red", style="dashed", minlen="2") >> brokers[1] + zk >> Edge(color="red", style="dashed", minlen="2") >> brokers[2] + + # Connect brokers to storage with better spacing + for broker in brokers: + broker >> Edge(color="blue", minlen="2", label="persist") >> storage + + # Connect producers to brokers with better spacing + for producer in producers: + producer >> Edge(color="green", minlen="2", label="produce") >> brokers[0] + + # Connect consumers to brokers with better spacing + for consumer in consumers: + brokers[2] >> Edge(color="purple", minlen="2", label="consume") >> consumer + + # Connect Kafka Connect and Streams with better spacing + kafka_connect >> Edge(color="orange", minlen="2", label="connect") >> brokers[1] + streams >> Edge(color="yellow", minlen="2", label="process") >> brokers[1] + +if __name__ == "__main__": + generate_architecture() \ No newline at end of file diff --git a/kafka_architecture.png b/kafka_architecture.png new file mode 100644 index 0000000000000..643e3afd0f1d3 Binary files /dev/null and b/kafka_architecture.png differ