Skip to content

Feature/arch analysis #18356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions .github/scripts/analyze_architecture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os
import re
from collections import defaultdict
import networkx as nx
import matplotlib.pyplot as plt

class KafkaArchitectureAnalyzer:
def __init__(self):
self.dependency_graph = nx.DiGraph()
self.package_dependencies = defaultdict(set)
self.violations = []

def analyze_java_file(self, file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()

# Extract package name
package_match = re.search(r'package\s+([\w.]+);', content)
if not package_match:
return

current_package = package_match.group(1)

# Extract imports
imports = re.findall(r'import\s+([\w.]+\*?);', content)

# Add dependencies to graph
for imp in imports:
if imp.startswith('org.apache.kafka'):
base_package = '.'.join(imp.split('.')[:4]) # Get main package
if base_package != current_package:
self.package_dependencies[current_package].add(base_package)
self.dependency_graph.add_edge(current_package, base_package)

# Check for architectural violations
self.check_violations(current_package, base_package)

def check_violations(self, source, target):
# Define architectural rules
client_packages = ['org.apache.kafka.clients']
server_packages = ['org.apache.kafka.server']
common_packages = ['org.apache.kafka.common']

# Rule 1: Clients should not depend on server internals
if any(source.startswith(p) for p in client_packages):
if any(target.startswith(p) for p in server_packages):
self.violations.append(f"Violation: Client package {source} should not depend on server package {target}")

# Rule 2: Server can depend on common
# Rule 3: Everyone can depend on common
# These are allowed, so no checks needed

def analyze_directory(self, root_dir):
for dirpath, _, filenames in os.walk(root_dir):
for filename in filenames:
if filename.endswith('.java'):
file_path = os.path.join(dirpath, filename)
self.analyze_java_file(file_path)

def generate_report(self):
# Create report
report = ["=== Kafka Architecture Analysis Report ===\n"]

# Add dependency statistics
report.append("\n=== Package Dependencies ===")
for pkg, deps in self.package_dependencies.items():
report.append(f"\n{pkg} depends on:")
for dep in deps:
report.append(f" - {dep}")

# Add violations
if self.violations:
report.append("\n=== Architectural Violations ===")
for violation in self.violations:
report.append(violation)
else:
report.append("\n=== No Architectural Violations Found ===")

# Save report
with open('architecture_report.txt', 'w') as f:
f.write('\n'.join(report))

# Generate visualization
plt.figure(figsize=(15, 15))
pos = nx.spring_layout(self.dependency_graph)
nx.draw(self.dependency_graph, pos, with_labels=True, node_color='lightblue',
node_size=2000, font_size=8, font_weight='bold', arrows=True)
plt.savefig('dependency_graph.png')
plt.close()

def main():
analyzer = KafkaArchitectureAnalyzer()

# Analyze main Kafka directories
kafka_dirs = [
'core/src/main/java',
'clients/src/main/java',
'connect/src/main/java',
'streams/src/main/java'
]

for directory in kafka_dirs:
if os.path.exists(directory):
analyzer.analyze_directory(directory)

analyzer.generate_report()

if __name__ == "__main__":
main()
36 changes: 36 additions & 0 deletions .github/workflows/architecture-analysis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Kafka Architecture Analysis

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
analyze:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install networkx pydot matplotlib

- name: Analyze Architecture
run: |
python .github/scripts/analyze_architecture.py

- name: Upload Analysis Results
uses: actions/upload-artifact@v3
with:
name: architecture-analysis
path: |
architecture_report.txt
dependency_graph.png
49 changes: 49 additions & 0 deletions .github/workflows/architecture.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Generate Kafka Architecture Diagram

on:
push:
branches: [ trunk ]
workflow_dispatch:

permissions:
contents: write

jobs:
generate:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0
ref: trunk

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.10'
cache: 'pip'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y graphviz graphviz-dev
python -m pip install --upgrade pip
pip install diagrams

- name: Create architecture diagram
run: |
python architecture_generator.py
ls -la

- name: Configure Git
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"

- name: Commit and push changes
run: |
git pull --rebase origin trunk
git add -f kafka_architecture.png
git commit -m "Add Kafka architecture diagram [skip ci]" || echo "No changes to commit"
git push
86 changes: 86 additions & 0 deletions architecture_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from diagrams import Diagram, Cluster, Edge
from diagrams.programming.language import Java
from diagrams.programming.framework import Spring
from diagrams.onprem.queue import Kafka
from diagrams.onprem.client import Client
from diagrams.generic.storage import Storage
from diagrams.onprem.compute import Server
from diagrams.onprem.monitoring import Grafana

def generate_architecture():
"""Generate architecture diagram for Apache Kafka."""

graph_attr = {
"fontsize": "45",
"bgcolor": "white",
"splines": "ortho",
"pad": "2.0",
"ranksep": "1.5",
"nodesep": "1.0"
}

node_attr = {
"fontsize": "14"
}

edge_attr = {
"fontsize": "12"
}

with Diagram(
"Apache Kafka Architecture",
show=False,
direction="LR", # Changed to left-to-right
graph_attr=graph_attr,
node_attr=node_attr,
edge_attr=edge_attr,
filename="kafka_architecture"
):
with Cluster("Kafka Cluster"):
brokers = [
Kafka("Broker 1"),
Kafka("Broker 2"),
Kafka("Broker 3")
]

kafka_connect = Spring("Kafka Connect")
streams = Java("Kafka Streams")

with Cluster("Storage & Coordination"):
zk = Grafana("ZooKeeper")
storage = Storage("Log Storage")

with Cluster("Clients"):
producers = [
Client("Producer 1"),
Client("Producer 2")
]

consumers = [
Client("Consumer 1"),
Client("Consumer 2")
]

# Connect ZooKeeper to brokers with better spacing
zk >> Edge(color="red", style="dashed", minlen="2", label="coordination") >> brokers[0]
zk >> Edge(color="red", style="dashed", minlen="2") >> brokers[1]
zk >> Edge(color="red", style="dashed", minlen="2") >> brokers[2]

# Connect brokers to storage with better spacing
for broker in brokers:
broker >> Edge(color="blue", minlen="2", label="persist") >> storage

# Connect producers to brokers with better spacing
for producer in producers:
producer >> Edge(color="green", minlen="2", label="produce") >> brokers[0]

# Connect consumers to brokers with better spacing
for consumer in consumers:
brokers[2] >> Edge(color="purple", minlen="2", label="consume") >> consumer

# Connect Kafka Connect and Streams with better spacing
kafka_connect >> Edge(color="orange", minlen="2", label="connect") >> brokers[1]
streams >> Edge(color="yellow", minlen="2", label="process") >> brokers[1]

if __name__ == "__main__":
generate_architecture()
Binary file added kafka_architecture.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading