diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..e0f15db2e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "java.configuration.updateBuildConfiguration": "automatic" +} \ No newline at end of file diff --git a/README.md b/README.md index 4aa6eeb3a..b144273a2 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,64 @@ rates below are specified as *records/second*. | High (167 Directives) | 426 | 127,946,398 | 82,677,845,324 | 106,367.27 | | High (167 Directives) | 426 | 511,785,592 | 330,711,381,296 | 105,768.93 | +## Byte Size and Time Duration Parsers + +The Wrangler library now includes built-in support for parsing and aggregating byte sizes and time durations. This feature allows you to easily work with data that includes size measurements (e.g., "1.5MB", "2GB") and time intervals (e.g., "500ms", "2.5h"). + +### Supported Units + +#### Byte Size Units +- B (Bytes) +- KB (Kilobytes) +- MB (Megabytes) +- GB (Gigabytes) +- TB (Terabytes) +- PB (Petabytes) + +#### Time Duration Units +- ns (Nanoseconds) +- us (Microseconds) +- ms (Milliseconds) +- s (Seconds) +- m (Minutes) +- h (Hours) +- d (Days) + +### Using the Aggregate Stats Directive + +The `aggregate-stats` directive allows you to aggregate byte sizes and time durations across rows. Here's the syntax: + +``` +aggregate-stats :size_column :time_column total_size_column total_time_column [output_size_unit] [output_time_unit] +``` + +Parameters: +- `:size_column` - Column containing byte sizes (e.g., "1.5MB", "2GB") +- `:time_column` - Column containing time durations (e.g., "500ms", "2.5h") +- `total_size_column` - Name of the output column for total size +- `total_time_column` - Name of the output column for total time +- `output_size_unit` - (Optional) Unit for the output size (default: "MB") +- `output_time_unit` - (Optional) Unit for the output time (default: "s") + +Example: +``` +# Input data: +# | data_size | response_time | +# |-----------|---------------| +# | 1.5MB | 500ms | +# | 2.5MB | 750ms | +# | 1MB | 250ms | + +# Directive: +aggregate-stats :data_size :response_time total_size total_time MB s + +# Output: +# | total_size | total_time | +# |------------|------------| +# | 5.0 | 1.5 | +``` + +The directive automatically handles mixed units in the input data, converting everything to a common base unit (bytes for sizes, nanoseconds for times) before aggregating and then converting to the requested output units. ## Contact @@ -216,3 +274,60 @@ Cask is a trademark of Cask Data, Inc. All rights reserved. Apache, Apache HBase, and HBase are trademarks of The Apache Software Foundation. Used with permission. No endorsement by The Apache Software Foundation is implied by the use of these marks. + +# Wrangler + +A data preparation tool for cleaning, transforming, and preparing data for analysis. + +## Unit Parsers + +### Byte Size Parser +The byte size parser supports the following units: +- B (bytes) +- KB (kilobytes) +- MB (megabytes) +- GB (gigabytes) +- TB (terabytes) +- PB (petabytes) + +Example usage: +``` +1B // 1 byte +1KB // 1 kilobyte +1MB // 1 megabyte +1GB // 1 gigabyte +1TB // 1 terabyte +1PB // 1 petabyte +``` + +### Time Duration Parser +The time duration parser supports the following units: +- ns (nanoseconds) +- us (microseconds) +- ms (milliseconds) +- s (seconds) +- m (minutes) +- h (hours) +- d (days) + +Example usage: +``` +1ns // 1 nanosecond +1us // 1 microsecond +1ms // 1 millisecond +1s // 1 second +1m // 1 minute +1h // 1 hour +1d // 1 day +``` + +### Usage in Directives +Both byte size and time duration values can be used in directives for data transformation and aggregation: + +``` +// Aggregate byte sizes +aggregate-stats :column1 sum as total_size; + +// Aggregate time durations +aggregate-stats :column2 average as avg_duration; +``` diff --git a/clickhouse-flatfile-ingestion/README.md b/clickhouse-flatfile-ingestion/README.md new file mode 100644 index 000000000..0b6c65ce6 --- /dev/null +++ b/clickhouse-flatfile-ingestion/README.md @@ -0,0 +1,137 @@ +# ClickHouse Flat File Ingestion Tool + +A web-based application for bidirectional data ingestion between ClickHouse database and Flat File platform. + +## Features + +- Bidirectional data ingestion between ClickHouse and Flat Files +- JWT-based authentication +- Schema discovery and validation +- Progress tracking for large data transfers +- Support for various file formats (CSV, JSON, etc.) +- Configurable data mapping +- Error handling and logging + +## Technology Stack + +### Backend +- Spring Boot +- Spring Security with JWT +- ClickHouse JDBC Driver +- Apache Commons CSV +- Jackson for JSON processing + +### Frontend +- React +- Material-UI +- Axios +- React Router +- React Query + +## Prerequisites + +- Java 17 or higher +- Node.js 16 or higher +- ClickHouse server +- PostgreSQL (for user management) + +## Environment Variables + +Create a `.env` file in the backend directory with the following variables: + +```properties +# Database Configuration +DB_URL=jdbc:postgresql://localhost:5432/ingestion_db +DB_USERNAME=your_db_username +DB_PASSWORD=your_db_password + +# ClickHouse Configuration +CLICKHOUSE_HOST=your_clickhouse_host +CLICKHOUSE_PORT=8443 +CLICKHOUSE_DATABASE=your_database +CLICKHOUSE_USER=your_username +CLICKHOUSE_PASSWORD=your_password + +# JWT Configuration +JWT_SECRET=your_jwt_secret_key + +# File Upload Configuration +UPLOAD_DIR=./uploads +``` + +## Installation + +1. Clone the repository +2. Set up environment variables +3. Build and run the backend: + ```bash + cd backend + ./mvnw clean install + ./mvnw spring-boot:run + ``` +4. Build and run the frontend: + ```bash + cd frontend + npm install + npm start + ``` + +## Usage + +1. Access the application at `http://localhost:3000` +2. Log in with your credentials +3. Select source (ClickHouse or Flat File) +4. Configure connection parameters +5. Select tables and columns +6. Preview data +7. Start ingestion process + +## Security Considerations + +- All sensitive information is stored in environment variables +- JWT tokens expire after 24 hours +- Passwords are hashed using BCrypt +- SSL/TLS encryption for database connections +- Input validation and sanitization +- Rate limiting on API endpoints + +## API Documentation + +### Authentication +- POST /api/auth/login - Login endpoint +- POST /api/auth/refresh - Refresh token endpoint + +### Ingestion +- POST /api/ingestion/export - Export data from ClickHouse to file +- POST /api/ingestion/import - Import data from file to ClickHouse +- GET /api/ingestion/progress/{jobId} - Get ingestion progress +- GET /api/ingestion/schema - Get table schema +- GET /api/ingestion/preview - Get data preview + +## Error Handling + +The application includes comprehensive error handling for: +- Invalid credentials +- Connection failures +- Schema mismatches +- File format errors +- Data validation errors +- Network timeouts + +## Logging + +- Application logs are stored in `logs/application.log` +- Log levels can be configured in `application.properties` +- Structured logging format for better analysis + +## Contributing + +1. Fork the repository +2. Create a feature branch +3. Commit your changes +4. Push to the branch +5. Create a Pull Request + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/pom.xml b/clickhouse-flatfile-ingestion/backend/pom.xml new file mode 100644 index 000000000..4a120cb2c --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/pom.xml @@ -0,0 +1,100 @@ + + + 4.0.0 + + org.springframework.boot + spring-boot-starter-parent + 2.7.0 + + com.wrangler + clickhouse-flatfile-ingestion + 1.0-SNAPSHOT + clickhouse-flatfile-ingestion + Bidirectional ClickHouse & Flat File Data Ingestion Tool + + + 11 + 0.3.2 + 0.11.5 + 1.9.0 + 1.18.24 + + + + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.boot + spring-boot-starter-validation + + + + + com.clickhouse + clickhouse-jdbc + ${clickhouse-jdbc.version} + + + + + io.jsonwebtoken + jjwt-api + ${jjwt.version} + + + io.jsonwebtoken + jjwt-impl + ${jjwt.version} + runtime + + + io.jsonwebtoken + jjwt-jackson + ${jjwt.version} + runtime + + + + + org.apache.commons + commons-csv + ${commons-csv.version} + + + + + org.projectlombok + lombok + ${lombok.version} + provided + + + + + org.springframework.boot + spring-boot-starter-test + test + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + + org.projectlombok + lombok + + + + + + + \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/ClickHouseFlatFileIngestionApplication.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/ClickHouseFlatFileIngestionApplication.java new file mode 100644 index 000000000..f2c3703af --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/ClickHouseFlatFileIngestionApplication.java @@ -0,0 +1,30 @@ +package com.ingestion; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.Bean; +import org.springframework.scheduling.annotation.EnableAsync; +import org.springframework.web.servlet.config.annotation.CorsRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +@SpringBootApplication +@EnableAsync +public class ClickHouseFlatFileIngestionApplication { + + public static void main(String[] args) { + SpringApplication.run(ClickHouseFlatFileIngestionApplication.class, args); + } + + @Bean + public WebMvcConfigurer corsConfigurer() { + return new WebMvcConfigurer() { + @Override + public void addCorsMappings(CorsRegistry registry) { + registry.addMapping("/**") + .allowedOrigins("*") + .allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS") + .allowedHeaders("*"); + } + }; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/IngestionApplication.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/IngestionApplication.java new file mode 100644 index 000000000..5384e838d --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/IngestionApplication.java @@ -0,0 +1,29 @@ +package com.ingestion; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.Bean; +import org.springframework.web.servlet.config.annotation.CorsRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +@SpringBootApplication +public class IngestionApplication { + + public static void main(String[] args) { + SpringApplication.run(IngestionApplication.class, args); + } + + @Bean + public WebMvcConfigurer corsConfigurer() { + return new WebMvcConfigurer() { + @Override + public void addCorsMappings(CorsRegistry registry) { + registry.addMapping("/**") + .allowedOrigins("http://localhost:3000") + .allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS") + .allowedHeaders("*") + .allowCredentials(true); + } + }; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/DatabaseConnectionException.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/DatabaseConnectionException.java new file mode 100644 index 000000000..13894838f --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/DatabaseConnectionException.java @@ -0,0 +1,15 @@ +package com.ingestion.config; + +/** + * Exception thrown when there are issues connecting to the database. + */ +public class DatabaseConnectionException extends RuntimeException { + + public DatabaseConnectionException(String message) { + super(message); + } + + public DatabaseConnectionException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/GlobalExceptionHandler.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/GlobalExceptionHandler.java new file mode 100644 index 000000000..37ca16fee --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/GlobalExceptionHandler.java @@ -0,0 +1,93 @@ +package com.ingestion.config; + +import com.ingestion.dto.ErrorResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.ControllerAdvice; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.context.request.WebRequest; +import org.springframework.web.servlet.mvc.method.annotation.ResponseEntityExceptionHandler; + +import java.time.LocalDateTime; +import java.util.HashMap; +import java.util.Map; + +/** + * Global exception handler for the application. + * Centralizes error handling and provides consistent error responses. + */ +@ControllerAdvice +public class GlobalExceptionHandler extends ResponseEntityExceptionHandler { + + private static final Logger logger = LoggerFactory.getLogger(GlobalExceptionHandler.class); + + /** + * Handles all unhandled exceptions + */ + @ExceptionHandler(Exception.class) + public ResponseEntity handleAllExceptions(Exception ex, WebRequest request) { + logger.error("Unhandled exception occurred", ex); + + ErrorResponse errorResponse = new ErrorResponse( + HttpStatus.INTERNAL_SERVER_ERROR.value(), + "An unexpected error occurred", + ex.getMessage(), + LocalDateTime.now() + ); + + return new ResponseEntity<>(errorResponse, HttpStatus.INTERNAL_SERVER_ERROR); + } + + /** + * Handles validation exceptions + */ + @ExceptionHandler(IllegalArgumentException.class) + public ResponseEntity handleValidationExceptions(IllegalArgumentException ex, WebRequest request) { + logger.warn("Validation error occurred", ex); + + ErrorResponse errorResponse = new ErrorResponse( + HttpStatus.BAD_REQUEST.value(), + "Validation error", + ex.getMessage(), + LocalDateTime.now() + ); + + return new ResponseEntity<>(errorResponse, HttpStatus.BAD_REQUEST); + } + + /** + * Handles resource not found exceptions + */ + @ExceptionHandler(ResourceNotFoundException.class) + public ResponseEntity handleResourceNotFoundException(ResourceNotFoundException ex, WebRequest request) { + logger.warn("Resource not found", ex); + + ErrorResponse errorResponse = new ErrorResponse( + HttpStatus.NOT_FOUND.value(), + "Resource not found", + ex.getMessage(), + LocalDateTime.now() + ); + + return new ResponseEntity<>(errorResponse, HttpStatus.NOT_FOUND); + } + + /** + * Handles database connection exceptions + */ + @ExceptionHandler(DatabaseConnectionException.class) + public ResponseEntity handleDatabaseConnectionException(DatabaseConnectionException ex, WebRequest request) { + logger.error("Database connection error", ex); + + ErrorResponse errorResponse = new ErrorResponse( + HttpStatus.SERVICE_UNAVAILABLE.value(), + "Database connection error", + ex.getMessage(), + LocalDateTime.now() + ); + + return new ResponseEntity<>(errorResponse, HttpStatus.SERVICE_UNAVAILABLE); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/HealthCheckConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/HealthCheckConfig.java new file mode 100644 index 000000000..1cb62bb9f --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/HealthCheckConfig.java @@ -0,0 +1,43 @@ +package com.ingestion.config; + +import org.springframework.boot.actuate.health.Health; +import org.springframework.boot.actuate.health.HealthIndicator; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.jdbc.core.JdbcTemplate; + +/** + * Configuration for health checks and monitoring. + */ +@Configuration +public class HealthCheckConfig { + + /** + * Creates a health indicator for the database connection. + */ + @Bean + public HealthIndicator dbHealthIndicator(JdbcTemplate jdbcTemplate) { + return () -> { + try { + jdbcTemplate.queryForObject("SELECT 1", Integer.class); + return Health.up().withDetail("database", "ClickHouse").build(); + } catch (Exception e) { + return Health.down() + .withDetail("database", "ClickHouse") + .withException(e) + .build(); + } + }; + } + + /** + * Creates a health indicator for the application. + */ + @Bean + public HealthIndicator applicationHealthIndicator() { + return () -> Health.up() + .withDetail("application", "ClickHouse Flat File Ingestion") + .withDetail("status", "Running") + .build(); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/JwtAuthenticationFilter.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/JwtAuthenticationFilter.java new file mode 100644 index 000000000..3dd75539c --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/JwtAuthenticationFilter.java @@ -0,0 +1,64 @@ +package com.ingestion.config; + +import jakarta.servlet.FilterChain; +import jakarta.servlet.ServletException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import org.springframework.security.authentication.UsernamePasswordAuthenticationToken; +import org.springframework.security.core.context.SecurityContextHolder; +import org.springframework.security.core.userdetails.UserDetails; +import org.springframework.security.core.userdetails.UserDetailsService; +import org.springframework.security.web.authentication.WebAuthenticationDetailsSource; +import org.springframework.stereotype.Component; +import org.springframework.web.filter.OncePerRequestFilter; + +import java.io.IOException; + +/** + * JWT Authentication Filter that validates JWT tokens in incoming requests. + */ +@Component +public class JwtAuthenticationFilter extends OncePerRequestFilter { + + private final JwtService jwtService; + private final UserDetailsService userDetailsService; + + public JwtAuthenticationFilter(JwtService jwtService, UserDetailsService userDetailsService) { + this.jwtService = jwtService; + this.userDetailsService = userDetailsService; + } + + @Override + protected void doFilterInternal( + HttpServletRequest request, + HttpServletResponse response, + FilterChain filterChain + ) throws ServletException, IOException { + final String authHeader = request.getHeader("Authorization"); + final String jwt; + final String username; + + if (authHeader == null || !authHeader.startsWith("Bearer ")) { + filterChain.doFilter(request, response); + return; + } + + jwt = authHeader.substring(7); + username = jwtService.extractUsername(jwt); + + if (username != null && SecurityContextHolder.getContext().getAuthentication() == null) { + UserDetails userDetails = this.userDetailsService.loadUserByUsername(username); + + if (jwtService.isTokenValid(jwt, userDetails)) { + UsernamePasswordAuthenticationToken authToken = new UsernamePasswordAuthenticationToken( + userDetails, + null, + userDetails.getAuthorities() + ); + authToken.setDetails(new WebAuthenticationDetailsSource().buildDetails(request)); + SecurityContextHolder.getContext().setAuthentication(authToken); + } + } + filterChain.doFilter(request, response); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/JwtService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/JwtService.java new file mode 100644 index 000000000..3f93ad717 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/JwtService.java @@ -0,0 +1,124 @@ +package com.ingestion.config; + +import io.jsonwebtoken.Claims; +import io.jsonwebtoken.Jwts; +import io.jsonwebtoken.SignatureAlgorithm; +import io.jsonwebtoken.io.Decoders; +import io.jsonwebtoken.security.Keys; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.security.core.userdetails.UserDetails; +import org.springframework.stereotype.Service; + +import java.security.Key; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Function; + +/** + * Service for JWT token operations including generation and validation. + */ +@Service +public class JwtService { + + @Value("${jwt.secret}") + private String secretKey; + + @Value("${jwt.expiration}") + private long jwtExpiration; + + @Value("${jwt.refresh-token.expiration}") + private long refreshExpiration; + + /** + * Extracts the username from a JWT token. + */ + public String extractUsername(String token) { + return extractClaim(token, Claims::getSubject); + } + + /** + * Extracts a specific claim from a JWT token. + */ + public T extractClaim(String token, Function claimsResolver) { + final Claims claims = extractAllClaims(token); + return claimsResolver.apply(claims); + } + + /** + * Generates a JWT token for a user. + */ + public String generateToken(UserDetails userDetails) { + return generateToken(new HashMap<>(), userDetails); + } + + /** + * Generates a JWT token with extra claims for a user. + */ + public String generateToken(Map extraClaims, UserDetails userDetails) { + return buildToken(extraClaims, userDetails, jwtExpiration); + } + + /** + * Generates a refresh token for a user. + */ + public String generateRefreshToken(UserDetails userDetails) { + return buildToken(new HashMap<>(), userDetails, refreshExpiration); + } + + /** + * Builds a JWT token with the specified claims and expiration. + */ + private String buildToken(Map extraClaims, UserDetails userDetails, long expiration) { + return Jwts + .builder() + .setClaims(extraClaims) + .setSubject(userDetails.getUsername()) + .setIssuedAt(new Date(System.currentTimeMillis())) + .setExpiration(new Date(System.currentTimeMillis() + expiration)) + .signWith(getSignInKey(), SignatureAlgorithm.HS256) + .compact(); + } + + /** + * Validates a JWT token for a user. + */ + public boolean isTokenValid(String token, UserDetails userDetails) { + final String username = extractUsername(token); + return (username.equals(userDetails.getUsername())) && !isTokenExpired(token); + } + + /** + * Checks if a token is expired. + */ + private boolean isTokenExpired(String token) { + return extractExpiration(token).before(new Date()); + } + + /** + * Extracts the expiration date from a token. + */ + private Date extractExpiration(String token) { + return extractClaim(token, Claims::getExpiration); + } + + /** + * Extracts all claims from a token. + */ + private Claims extractAllClaims(String token) { + return Jwts + .parserBuilder() + .setSigningKey(getSignInKey()) + .build() + .parseClaimsJws(token) + .getBody(); + } + + /** + * Gets the signing key for JWT operations. + */ + private Key getSignInKey() { + byte[] keyBytes = Decoders.BASE64.decode(secretKey); + return Keys.hmacShaKeyFor(keyBytes); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/MetricsConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/MetricsConfig.java new file mode 100644 index 000000000..180f93b74 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/MetricsConfig.java @@ -0,0 +1,21 @@ +package com.ingestion.config; + +import io.micrometer.core.aop.TimedAspect; +import io.micrometer.core.instrument.MeterRegistry; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * Configuration for application metrics and monitoring. + */ +@Configuration +public class MetricsConfig { + + /** + * Creates a TimedAspect bean for method timing. + */ + @Bean + public TimedAspect timedAspect(MeterRegistry registry) { + return new TimedAspect(registry); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/ResourceNotFoundException.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/ResourceNotFoundException.java new file mode 100644 index 000000000..eab29796b --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/ResourceNotFoundException.java @@ -0,0 +1,15 @@ +package com.ingestion.config; + +/** + * Exception thrown when a requested resource is not found. + */ +public class ResourceNotFoundException extends RuntimeException { + + public ResourceNotFoundException(String message) { + super(message); + } + + public ResourceNotFoundException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/SecurityConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/SecurityConfig.java new file mode 100644 index 000000000..36e2e03bf --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/config/SecurityConfig.java @@ -0,0 +1,62 @@ +package com.ingestion.config; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.security.config.annotation.method.configuration.EnableMethodSecurity; +import org.springframework.security.config.annotation.web.builders.HttpSecurity; +import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity; +import org.springframework.security.config.http.SessionCreationPolicy; +import org.springframework.security.web.SecurityFilterChain; +import org.springframework.security.web.authentication.UsernamePasswordAuthenticationFilter; +import org.springframework.web.cors.CorsConfiguration; +import org.springframework.web.cors.CorsConfigurationSource; +import org.springframework.web.cors.UrlBasedCorsConfigurationSource; + +import java.util.Arrays; + +@Configuration +@EnableWebSecurity +@EnableMethodSecurity +public class SecurityConfig { + + private final JwtAuthenticationFilter jwtAuthenticationFilter; + + public SecurityConfig(JwtAuthenticationFilter jwtAuthenticationFilter) { + this.jwtAuthenticationFilter = jwtAuthenticationFilter; + } + + @Bean + public SecurityFilterChain filterChain(HttpSecurity http) throws Exception { + http + .csrf(csrf -> csrf.disable()) + .cors(cors -> cors.configurationSource(corsConfigurationSource())) + .authorizeHttpRequests(auth -> auth + .requestMatchers("/api/auth/**").permitAll() + .requestMatchers("/api/public/**").permitAll() + .requestMatchers("/swagger-ui/**", "/v3/api-docs/**").permitAll() + .requestMatchers("/api/**").authenticated() + .anyRequest().authenticated() + ) + .sessionManagement(session -> session + .sessionCreationPolicy(SessionCreationPolicy.STATELESS) + ) + .addFilterBefore(jwtAuthenticationFilter, UsernamePasswordAuthenticationFilter.class); + + return http.build(); + } + + @Bean + public CorsConfigurationSource corsConfigurationSource() { + CorsConfiguration configuration = new CorsConfiguration(); + configuration.setAllowedOrigins(Arrays.asList("*")); // In production, specify exact origins + configuration.setAllowedMethods(Arrays.asList("GET", "POST", "PUT", "DELETE", "OPTIONS")); + configuration.setAllowedHeaders(Arrays.asList("Authorization", "Content-Type", "X-Requested-With")); + configuration.setExposedHeaders(Arrays.asList("Authorization")); + configuration.setAllowCredentials(true); + configuration.setMaxAge(3600L); + + UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource(); + source.registerCorsConfiguration("/**", configuration); + return source; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/controller/IngestionStatusController.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/controller/IngestionStatusController.java new file mode 100644 index 000000000..8bf0b46bf --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/controller/IngestionStatusController.java @@ -0,0 +1,115 @@ +package com.ingestion.controller; + +import com.ingestion.model.IngestionStatus; +import com.ingestion.model.IngestionStatusEnum; +import com.ingestion.service.IngestionStatusService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import java.util.List; + +@RestController +@RequestMapping("/api/ingestion-status") +public class IngestionStatusController { + + @Autowired + private IngestionStatusService ingestionStatusService; + + @PostMapping + public ResponseEntity createStatus( + @RequestParam String tableName, + @RequestParam String fileName) { + IngestionStatus status = ingestionStatusService.createIngestionStatus(tableName, fileName); + return ResponseEntity.ok(status); + } + + @PutMapping("/{id}/status") + public ResponseEntity updateStatus( + @PathVariable String id, + @RequestParam IngestionStatusEnum status) { + IngestionStatus updatedStatus = ingestionStatusService.updateStatus(id, status); + if (updatedStatus != null) { + return ResponseEntity.ok(updatedStatus); + } + return ResponseEntity.notFound().build(); + } + + @PutMapping("/{id}/progress") + public ResponseEntity updateProgress( + @PathVariable String id, + @RequestParam long processedRows, + @RequestParam long failedRows) { + IngestionStatus updatedStatus = ingestionStatusService.updateProgress(id, processedRows, failedRows); + if (updatedStatus != null) { + return ResponseEntity.ok(updatedStatus); + } + return ResponseEntity.notFound().build(); + } + + @PutMapping("/{id}/total-rows") + public ResponseEntity updateTotalRows( + @PathVariable String id, + @RequestParam long totalRows) { + IngestionStatus updatedStatus = ingestionStatusService.updateTotalRows(id, totalRows); + if (updatedStatus != null) { + return ResponseEntity.ok(updatedStatus); + } + return ResponseEntity.notFound().build(); + } + + @PutMapping("/{id}/error") + public ResponseEntity updateErrorMessage( + @PathVariable String id, + @RequestParam String errorMessage) { + IngestionStatus updatedStatus = ingestionStatusService.updateErrorMessage(id, errorMessage); + if (updatedStatus != null) { + return ResponseEntity.ok(updatedStatus); + } + return ResponseEntity.notFound().build(); + } + + @PutMapping("/{id}/retry") + public ResponseEntity incrementRetryCount( + @PathVariable String id) { + IngestionStatus updatedStatus = ingestionStatusService.incrementRetryCount(id); + if (updatedStatus != null) { + return ResponseEntity.ok(updatedStatus); + } + return ResponseEntity.notFound().build(); + } + + @GetMapping("/table/{tableName}") + public ResponseEntity> getStatusByTableName( + @PathVariable String tableName) { + List statuses = ingestionStatusService.getStatusByTableName(tableName); + return ResponseEntity.ok(statuses); + } + + @GetMapping("/file/{fileName}") + public ResponseEntity> getStatusByFileName( + @PathVariable String fileName) { + List statuses = ingestionStatusService.getStatusByFileName(fileName); + return ResponseEntity.ok(statuses); + } + + @GetMapping("/incomplete") + public ResponseEntity> getIncompleteIngestions() { + List statuses = ingestionStatusService.getIncompleteIngestions(); + return ResponseEntity.ok(statuses); + } + + @GetMapping("/status/{status}") + public ResponseEntity> getStatusByStatus( + @PathVariable String status) { + List statuses = ingestionStatusService.getStatusByStatus(status); + return ResponseEntity.ok(statuses); + } + + @GetMapping("/{id}") + public ResponseEntity getStatusById(@PathVariable String id) { + return ingestionStatusService.getStatusById(id) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/controller/TableMappingController.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/controller/TableMappingController.java new file mode 100644 index 000000000..71a738439 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/controller/TableMappingController.java @@ -0,0 +1,55 @@ +package com.ingestion.controller; + +import com.ingestion.dto.TableMappingRequest; +import com.ingestion.entity.TableMapping; +import com.ingestion.service.TableMappingService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import javax.validation.Valid; +import java.util.List; + +@RestController +@RequestMapping("/api/table-mappings") +public class TableMappingController { + + private final TableMappingService tableMappingService; + + @Autowired + public TableMappingController(TableMappingService tableMappingService) { + this.tableMappingService = tableMappingService; + } + + @PostMapping + public ResponseEntity createTableMapping(@Valid @RequestBody TableMappingRequest request) { + TableMapping tableMapping = tableMappingService.createTableMapping(request); + return ResponseEntity.ok(tableMapping); + } + + @GetMapping("/{tableName}") + public ResponseEntity getTableMapping(@PathVariable String tableName) { + TableMapping tableMapping = tableMappingService.getTableMapping(tableName); + return ResponseEntity.ok(tableMapping); + } + + @GetMapping + public ResponseEntity> getAllTableMappings() { + List tableMappings = tableMappingService.getAllTableMappings(); + return ResponseEntity.ok(tableMappings); + } + + @PutMapping("/{tableName}") + public ResponseEntity updateTableMapping( + @PathVariable String tableName, + @Valid @RequestBody TableMappingRequest request) { + TableMapping tableMapping = tableMappingService.updateTableMapping(tableName, request); + return ResponseEntity.ok(tableMapping); + } + + @DeleteMapping("/{tableName}") + public ResponseEntity deleteTableMapping(@PathVariable String tableName) { + tableMappingService.deleteTableMapping(tableName); + return ResponseEntity.ok().build(); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/ConnectionRequest.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/ConnectionRequest.java new file mode 100644 index 000000000..b004603d3 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/ConnectionRequest.java @@ -0,0 +1,31 @@ +package com.ingestion.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import jakarta.validation.constraints.NotBlank; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ConnectionRequest { + + @NotBlank + private String host; + + @NotBlank + private String port; + + @NotBlank + private String database; + + @NotBlank + private String username; + + @NotBlank + private String password; + + private String connectionType; // CLICKHOUSE or FLATFILE +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/ErrorResponse.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/ErrorResponse.java new file mode 100644 index 000000000..ce0670e41 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/ErrorResponse.java @@ -0,0 +1,21 @@ +package com.ingestion.dto; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.time.LocalDateTime; + +/** + * Standardized error response for the API. + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +public class ErrorResponse { + + private int status; + private String error; + private String message; + private LocalDateTime timestamp; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/FileUploadRequest.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/FileUploadRequest.java new file mode 100644 index 000000000..d8f7f31dc --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/FileUploadRequest.java @@ -0,0 +1,29 @@ +package com.ingestion.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import jakarta.validation.constraints.NotBlank; +import org.springframework.web.multipart.MultipartFile; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class FileUploadRequest { + + @NotBlank(message = "File name is required") + private String fileName; + + @NotBlank(message = "Table name is required") + private String tableName; + + private String delimiter; + + private Boolean hasHeader; + + private String encoding; + + private MultipartFile file; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/FlatFileConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/FlatFileConfig.java new file mode 100644 index 000000000..192739cd7 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/FlatFileConfig.java @@ -0,0 +1,37 @@ +package com.ingestion.dto; + +import lombok.Data; +import lombok.NoArgsConstructor; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; + +@Data +@NoArgsConstructor +public class FlatFileConfig { + + @NotBlank(message = "File name is required") + private String fileName; + + @NotBlank(message = "File path is required") + private String filePath; + + @NotBlank(message = "File type is required") + private String fileType; + + @NotBlank + private String delimiter; + + @NotBlank + private String encoding; + + @NotNull + private Boolean hasHeader; + + @NotNull + private Boolean skipEmptyLines; + + private String dateFormat; + private String timeFormat; + private String timestampFormat; + private String timezone; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionRequest.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionRequest.java new file mode 100644 index 000000000..43492a8a1 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionRequest.java @@ -0,0 +1,25 @@ +package com.ingestion.dto; + +import lombok.Data; +import lombok.NoArgsConstructor; +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; + +@Data +@NoArgsConstructor +public class IngestionRequest { + + @NotBlank(message = "Connection ID is required") + private String connectionId; + + @NotBlank(message = "Table name is required") + private String tableName; + + @NotBlank(message = "File name is required") + private String fileName; + + @NotNull(message = "File configuration is required") + @Valid + private FlatFileConfig fileConfig; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionStatusDTO.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionStatusDTO.java new file mode 100644 index 000000000..b9148e717 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionStatusDTO.java @@ -0,0 +1,21 @@ +package com.ingestion.dto; + +import com.ingestion.entity.IngestionStatusEnum; +import lombok.Data; + +import java.time.LocalDateTime; + +@Data +public class IngestionStatusDTO { + private String id; + private String tableName; + private String fileName; + private IngestionStatusEnum status; + private Integer progress; + private Long totalRows; + private String errorMessage; + private Integer retryCount; + private LocalDateTime createdAt; + private LocalDateTime updatedAt; + private LocalDateTime completedAt; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionStatusRequest.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionStatusRequest.java new file mode 100644 index 000000000..e769909f9 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/IngestionStatusRequest.java @@ -0,0 +1,29 @@ +package com.ingestion.dto; + +import com.ingestion.entity.IngestionStatusEnum; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class IngestionStatusRequest { + @NotBlank + private String tableName; + + @NotBlank + private String fileName; + + @NotNull + private IngestionStatusEnum status; + + private Integer progress; + private Long totalRows; + private String errorMessage; + private Integer retryCount; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/TableMappingRequest.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/TableMappingRequest.java new file mode 100644 index 000000000..18dc00922 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/dto/TableMappingRequest.java @@ -0,0 +1,38 @@ +package com.ingestion.dto; + +import lombok.Data; +import lombok.NoArgsConstructor; +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotEmpty; +import java.util.List; + +@Data +@NoArgsConstructor +public class TableMappingRequest { + + @NotBlank(message = "Table name is required") + private String tableName; + + @NotBlank(message = "Connection ID is required") + private String connectionId; + + @NotEmpty(message = "At least one column mapping is required") + @Valid + private List columnMappings; + + @Data + @NoArgsConstructor + public static class ColumnMapping { + @NotBlank(message = "Source column name is required") + private String sourceColumn; + + @NotBlank(message = "Target column name is required") + private String targetColumn; + + private String dataType; + private String transformation; + private Boolean isNullable; + private String defaultValue; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/IngestionStatus.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/IngestionStatus.java new file mode 100644 index 000000000..5b927366d --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/IngestionStatus.java @@ -0,0 +1,73 @@ +package com.ingestion.entity; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import jakarta.persistence.*; +import java.time.LocalDateTime; + +@Entity +@Table(name = "ingestion_status") +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class IngestionStatus { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(name = "file_name", nullable = false) + private String fileName; + + @Column(name = "table_name", nullable = false) + private String tableName; + + @Column(name = "status", nullable = false) + @Enumerated(EnumType.STRING) + private IngestionStatusType status; + + @Column(name = "records_processed") + private Long recordsProcessed; + + @Column(name = "records_failed") + private Long recordsFailed; + + @Column(name = "start_time") + private LocalDateTime startTime; + + @Column(name = "end_time") + private LocalDateTime endTime; + + @Column(name = "error_message") + private String errorMessage; + + @Column(name = "created_at") + private LocalDateTime createdAt; + + @Column(name = "updated_at") + private LocalDateTime updatedAt; + + @PrePersist + protected void onCreate() { + createdAt = LocalDateTime.now(); + updatedAt = LocalDateTime.now(); + if (status == null) { + status = IngestionStatusType.PENDING; + } + if (recordsProcessed == null) { + recordsProcessed = 0L; + } + if (recordsFailed == null) { + recordsFailed = 0L; + } + } + + @PreUpdate + protected void onUpdate() { + updatedAt = LocalDateTime.now(); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/IngestionStatusEnum.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/IngestionStatusEnum.java new file mode 100644 index 000000000..4be6a84e3 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/IngestionStatusEnum.java @@ -0,0 +1,8 @@ +package com.ingestion.entity; + +public enum IngestionStatusEnum { + IN_PROGRESS, + SUCCESS, + FAILED, + CANCELLED +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/TableMapping.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/TableMapping.java new file mode 100644 index 000000000..e8c02474b --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/entity/TableMapping.java @@ -0,0 +1,50 @@ +package com.ingestion.entity; + +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.AllArgsConstructor; + +import jakarta.persistence.*; +import java.time.LocalDateTime; +import java.util.Map; + +@Entity +@Table(name = "table_mappings") +@Data +@NoArgsConstructor +@AllArgsConstructor +public class TableMapping { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(name = "source_table", nullable = false) + private String sourceTable; + + @Column(name = "target_table", nullable = false) + private String targetTable; + + @ElementCollection + @CollectionTable(name = "column_mappings", joinColumns = @JoinColumn(name = "table_mapping_id")) + @MapKeyColumn(name = "source_column") + @Column(name = "target_column") + private Map columnMappings; + + @Column(name = "created_at") + private LocalDateTime createdAt; + + @Column(name = "updated_at") + private LocalDateTime updatedAt; + + @PrePersist + protected void onCreate() { + createdAt = LocalDateTime.now(); + updatedAt = LocalDateTime.now(); + } + + @PreUpdate + protected void onUpdate() { + updatedAt = LocalDateTime.now(); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/mapper/IngestionStatusMapper.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/mapper/IngestionStatusMapper.java new file mode 100644 index 000000000..90a250b4a --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/mapper/IngestionStatusMapper.java @@ -0,0 +1,51 @@ +package com.ingestion.mapper; + +import com.ingestion.dto.IngestionStatusDTO; +import com.ingestion.entity.IngestionStatus; +import org.springframework.stereotype.Component; + +@Component +public class IngestionStatusMapper { + + public IngestionStatusDTO toDTO(IngestionStatus entity) { + if (entity == null) { + return null; + } + + IngestionStatusDTO dto = new IngestionStatusDTO(); + dto.setId(entity.getId()); + dto.setTableName(entity.getTableName()); + dto.setFileName(entity.getFileName()); + dto.setStatus(entity.getStatus()); + dto.setProgress(entity.getProgress()); + dto.setTotalRows(entity.getTotalRows()); + dto.setErrorMessage(entity.getErrorMessage()); + dto.setRetryCount(entity.getRetryCount()); + dto.setCreatedAt(entity.getCreatedAt()); + dto.setUpdatedAt(entity.getUpdatedAt()); + dto.setCompletedAt(entity.getCompletedAt()); + + return dto; + } + + public IngestionStatus toEntity(IngestionStatusDTO dto) { + if (dto == null) { + return null; + } + + IngestionStatus entity = new IngestionStatus(); + entity.setId(dto.getId()); + entity.setTableName(dto.getTableName()); + entity.setFileName(dto.getFileName()); + entity.setStatus(dto.getStatus()); + entity.setProgress(dto.getProgress()); + entity.setTotalRows(dto.getTotalRows()); + entity.setErrorMessage(dto.getErrorMessage()); + entity.setRetryCount(dto.getRetryCount()); + entity.setCreatedAt(dto.getCreatedAt()); + entity.setUpdatedAt(dto.getUpdatedAt()); + entity.setCompletedAt(dto.getCompletedAt()); + + return entity; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/ClickHouseConnection.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/ClickHouseConnection.java new file mode 100644 index 000000000..38cd42e56 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/ClickHouseConnection.java @@ -0,0 +1,35 @@ +package com.ingestion.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.Builder; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; + +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class ClickHouseConnection { + + @NotBlank + private String host; + + @NotNull + private Integer port; + + @NotBlank + private String database; + + @NotBlank + private String username; + + @NotBlank + private String password; + + @NotBlank(message = "JWT token is required") + private String jwtToken; + + private boolean useHttps = false; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/FlatFileConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/FlatFileConfig.java new file mode 100644 index 000000000..7a6c5a769 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/FlatFileConfig.java @@ -0,0 +1,13 @@ +package com.ingestion.model; + +import lombok.Data; + +@Data +public class FlatFileConfig { + private char delimiter; + private char quoteCharacter; + private char escapeCharacter; + private boolean hasHeader; + private boolean skipEmptyLines; + private boolean trimValues; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionRequest.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionRequest.java new file mode 100644 index 000000000..f38bed13a --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionRequest.java @@ -0,0 +1,32 @@ +package com.ingestion.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +import javax.validation.Valid; +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class IngestionRequest { + + @NotBlank(message = "Table name is required") + private String tableName; + + @NotNull(message = "ClickHouse connection details are required") + @Valid + private ClickHouseConnection connection; + + @NotNull(message = "Flat file configuration is required") + @Valid + private FlatFileConfig fileConfig; + + private String batchSize = "10000"; + + private String maxRetries = "3"; + + private String retryInterval = "5000"; // milliseconds +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionResponse.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionResponse.java new file mode 100644 index 000000000..f11fcdf4d --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionResponse.java @@ -0,0 +1,36 @@ +package com.ingestion.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.time.LocalDateTime; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class IngestionResponse { + + private String status; // SUCCESS, FAILED, IN_PROGRESS + + private String message; + + private LocalDateTime startTime; + + private LocalDateTime endTime; + + private long totalRows; + + private long processedRows; + + private long failedRows; + + private String errorDetails; + + private String jobId; + + @Builder.Default + private boolean isComplete = false; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionStatus.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionStatus.java new file mode 100644 index 000000000..315e4856e --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionStatus.java @@ -0,0 +1,41 @@ +package com.ingestion.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.time.LocalDateTime; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class IngestionStatus { + + private String jobId; + + private String status; // SUCCESS, FAILED, IN_PROGRESS, CANCELLED + + private LocalDateTime startTime; + + private LocalDateTime lastUpdatedTime; + + private long totalRows; + + private long processedRows; + + private long failedRows; + + private String errorMessage; + + private String tableName; + + private String fileName; + + @Builder.Default + private boolean isComplete = false; + + @Builder.Default + private int retryCount = 0; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionStatusEnum.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionStatusEnum.java new file mode 100644 index 000000000..fc96acc50 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/model/IngestionStatusEnum.java @@ -0,0 +1,19 @@ +package com.ingestion.model; + +public enum IngestionStatusEnum { + + SUCCESS("Success"), + FAILED("Failed"), + IN_PROGRESS("In Progress"), + CANCELLED("Cancelled"); + + private final String displayName; + + IngestionStatusEnum(String displayName) { + this.displayName = displayName; + } + + public String getDisplayName() { + return displayName; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/repository/IngestionStatusRepository.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/repository/IngestionStatusRepository.java new file mode 100644 index 000000000..cdfedec37 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/repository/IngestionStatusRepository.java @@ -0,0 +1,20 @@ +package com.ingestion.repository; + +import com.ingestion.entity.IngestionStatus; +import com.ingestion.entity.IngestionStatusEnum; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import java.util.List; + +@Repository +public interface IngestionStatusRepository extends JpaRepository { + + List findByTableName(String tableName); + + List findByFileName(String fileName); + + List findByStatus(IngestionStatusEnum status); + + List findByCompletedAtIsNull(); +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/repository/TableMappingRepository.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/repository/TableMappingRepository.java new file mode 100644 index 000000000..98d92ea97 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/repository/TableMappingRepository.java @@ -0,0 +1,15 @@ +package com.ingestion.repository; + +import com.ingestion.entity.TableMapping; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import java.util.Optional; + +@Repository +public interface TableMappingRepository extends JpaRepository { + + Optional findByTableName(String tableName); + + boolean existsByTableName(String tableName); +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/ClickHouseConnectionService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/ClickHouseConnectionService.java new file mode 100644 index 000000000..bf44c0a38 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/ClickHouseConnectionService.java @@ -0,0 +1,274 @@ +package com.ingestion.service; + +import com.ingestion.dto.ConnectionRequest; +import com.ingestion.model.ClickHouseConnection; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; +import ru.yandex.clickhouse.ClickHouseDataSource; +import ru.yandex.clickhouse.settings.ClickHouseProperties; + +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +@Service +@Slf4j +public class ClickHouseConnectionService { + + private final Map connectionPool = new ConcurrentHashMap<>(); + + /** + * Creates a connection to ClickHouse database + * + * @param connectionRequest Connection details + * @return Connection ID + */ + public String createConnection(ConnectionRequest connectionRequest) { + String connectionId = generateConnectionId(connectionRequest); + + if (connectionPool.containsKey(connectionId)) { + log.info("Connection {} already exists", connectionId); + return connectionId; + } + + try { + ClickHouseProperties properties = new ClickHouseProperties(); + properties.setUser(connectionRequest.getUsername()); + properties.setPassword(connectionRequest.getPassword()); + + String url = String.format("jdbc:clickhouse://%s:%s/%s", + connectionRequest.getHost(), + connectionRequest.getPort(), + connectionRequest.getDatabase()); + + ClickHouseDataSource dataSource = new ClickHouseDataSource(url, properties); + + // Test connection + try (Connection connection = dataSource.getConnection()) { + log.info("Successfully connected to ClickHouse database: {}", connectionRequest.getDatabase()); + } + + connectionPool.put(connectionId, dataSource); + return connectionId; + } catch (SQLException e) { + log.error("Failed to create connection to ClickHouse: {}", e.getMessage()); + throw new RuntimeException("Failed to create connection to ClickHouse: " + e.getMessage()); + } + } + + /** + * Tests a connection to ClickHouse database + * + * @param connectionRequest Connection details + * @return true if connection is successful, false otherwise + */ + public boolean testConnection(ConnectionRequest connectionRequest) { + try { + ClickHouseProperties properties = new ClickHouseProperties(); + properties.setUser(connectionRequest.getUsername()); + properties.setPassword(connectionRequest.getPassword()); + + String url = String.format("jdbc:clickhouse://%s:%s/%s", + connectionRequest.getHost(), + connectionRequest.getPort(), + connectionRequest.getDatabase()); + + ClickHouseDataSource dataSource = new ClickHouseDataSource(url, properties); + + try (Connection connection = dataSource.getConnection()) { + log.info("Successfully tested connection to ClickHouse database: {}", connectionRequest.getDatabase()); + return true; + } + } catch (SQLException e) { + log.error("Failed to test connection to ClickHouse: {}", e.getMessage()); + return false; + } + } + + /** + * Gets a connection from the pool + * + * @param connectionId Connection ID + * @return Connection + */ + public Connection getConnection(String connectionId) { + ClickHouseDataSource dataSource = connectionPool.get(connectionId); + if (dataSource == null) { + throw new RuntimeException("Connection not found: " + connectionId); + } + + try { + return dataSource.getConnection(); + } catch (SQLException e) { + log.error("Failed to get connection: {}", e.getMessage()); + throw new RuntimeException("Failed to get connection: " + e.getMessage()); + } + } + + /** + * Gets all tables from a database + * + * @param connectionId Connection ID + * @return List of table names + */ + public List getTables(String connectionId) { + List tables = new ArrayList<>(); + + try (Connection connection = getConnection(connectionId); + ResultSet resultSet = connection.getMetaData().getTables(null, null, "%", new String[]{"TABLE"})) { + + while (resultSet.next()) { + tables.add(resultSet.getString("TABLE_NAME")); + } + + return tables; + } catch (SQLException e) { + log.error("Failed to get tables: {}", e.getMessage()); + throw new RuntimeException("Failed to get tables: " + e.getMessage()); + } + } + + /** + * Gets the schema of a table + * + * @param connectionId Connection ID + * @param tableName Table name + * @return Map of column names to column types + */ + public Map getTableSchema(String connectionId, String tableName) { + Map schema = new HashMap<>(); + + try (Connection connection = getConnection(connectionId); + ResultSet resultSet = connection.getMetaData().getColumns(null, null, tableName, "%")) { + + while (resultSet.next()) { + String columnName = resultSet.getString("COLUMN_NAME"); + String columnType = resultSet.getString("TYPE_NAME"); + schema.put(columnName, columnType); + } + + return schema; + } catch (SQLException e) { + log.error("Failed to get table schema: {}", e.getMessage()); + throw new RuntimeException("Failed to get table schema: " + e.getMessage()); + } + } + + /** + * Executes a query and returns the result + * + * @param connectionId Connection ID + * @param query SQL query + * @return ResultSet + */ + public ResultSet executeQuery(String connectionId, String query) { + try { + Connection connection = getConnection(connectionId); + return connection.createStatement().executeQuery(query); + } catch (SQLException e) { + log.error("Failed to execute query: {}", e.getMessage()); + throw new RuntimeException("Failed to execute query: " + e.getMessage()); + } + } + + /** + * Executes a query with parameters and returns the result + * + * @param connectionId Connection ID + * @param query SQL query with parameters + * @param parameters Query parameters + * @return ResultSet + */ + public ResultSet executeQuery(String connectionId, String query, Object... parameters) { + try { + Connection connection = getConnection(connectionId); + java.sql.PreparedStatement preparedStatement = connection.prepareStatement(query); + + for (int i = 0; i < parameters.length; i++) { + preparedStatement.setObject(i + 1, parameters[i]); + } + + return preparedStatement.executeQuery(); + } catch (SQLException e) { + log.error("Failed to execute query with parameters: {}", e.getMessage()); + throw new RuntimeException("Failed to execute query with parameters: " + e.getMessage()); + } + } + + /** + * Executes a batch insert + * + * @param connectionId Connection ID + * @param tableName Table name + * @param columns Column names + * @param values List of value arrays + * @return Number of rows affected + */ + public int executeBatchInsert(String connectionId, String tableName, List columns, List values) { + if (values.isEmpty()) { + return 0; + } + + try { + Connection connection = getConnection(connectionId); + connection.setAutoCommit(false); + + String columnList = String.join(", ", columns); + String placeholders = String.join(", ", java.util.Collections.nCopies(columns.size(), "?")); + String query = String.format("INSERT INTO %s (%s) VALUES (%s)", tableName, columnList, placeholders); + + java.sql.PreparedStatement preparedStatement = connection.prepareStatement(query); + + for (Object[] row : values) { + for (int i = 0; i < row.length; i++) { + preparedStatement.setObject(i + 1, row[i]); + } + preparedStatement.addBatch(); + } + + int[] results = preparedStatement.executeBatch(); + connection.commit(); + + int totalRows = 0; + for (int result : results) { + totalRows += result; + } + + return totalRows; + } catch (SQLException e) { + log.error("Failed to execute batch insert: {}", e.getMessage()); + throw new RuntimeException("Failed to execute batch insert: " + e.getMessage()); + } + } + + /** + * Closes a connection + * + * @param connectionId Connection ID + */ + public void closeConnection(String connectionId) { + ClickHouseDataSource dataSource = connectionPool.remove(connectionId); + if (dataSource != null) { + log.info("Connection {} closed", connectionId); + } + } + + /** + * Generates a unique connection ID + * + * @param connectionRequest Connection details + * @return Connection ID + */ + private String generateConnectionId(ConnectionRequest connectionRequest) { + return String.format("%s_%s_%s", + connectionRequest.getHost(), + connectionRequest.getPort(), + connectionRequest.getDatabase()); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/DataIngestionService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/DataIngestionService.java new file mode 100644 index 000000000..10b47492e --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/DataIngestionService.java @@ -0,0 +1,161 @@ +package com.ingestion.service; + +import com.ingestion.dto.IngestionRequest; +import com.ingestion.model.FlatFileConfig; +import com.ingestion.model.IngestionStatus; +import com.ingestion.model.TableMapping; +import com.ingestion.repository.TableMappingRepository; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +@Service +@Slf4j +public class DataIngestionService { + + @Autowired + private ClickHouseConnectionService clickHouseService; + + @Autowired + private FlatFileService flatFileService; + + @Autowired + private IngestionStatusService statusService; + + @Autowired + private TableMappingRepository tableMappingRepository; + + private final ExecutorService executorService = Executors.newFixedThreadPool(5); + + /** + * Starts the ingestion process + * + * @param file File to ingest + * @param request Ingestion request + * @return Ingestion status ID + */ + public String startIngestion(MultipartFile file, IngestionRequest request) { + try { + // Create ingestion status + String statusId = statusService.createIngestionStatus(request.getTableName(), file.getOriginalFilename()); + + // Start ingestion asynchronously + CompletableFuture.runAsync(() -> { + try { + processIngestion(file, request, statusId); + } catch (Exception e) { + log.error("Ingestion failed: {}", e.getMessage()); + statusService.updateStatus(statusId, "FAILED"); + statusService.updateErrorMessage(statusId, e.getMessage()); + } + }, executorService); + + return statusId; + } catch (Exception e) { + log.error("Failed to start ingestion: {}", e.getMessage()); + throw new RuntimeException("Failed to start ingestion: " + e.getMessage()); + } + } + + /** + * Processes the ingestion + * + * @param file File to ingest + * @param request Ingestion request + * @param statusId Ingestion status ID + */ + private void processIngestion(MultipartFile file, IngestionRequest request, String statusId) { + try { + // Upload file + Path filePath = flatFileService.uploadFile(file, request.getFileConfig()); + + // Get table mapping + TableMapping tableMapping = tableMappingRepository.findByTableName(request.getTableName()) + .orElseThrow(() -> new RuntimeException("Table mapping not found: " + request.getTableName())); + + // Create flat file config + FlatFileConfig flatFileConfig = createFlatFileConfig(request.getFileConfig()); + + // Stream and process the file + long totalRows = flatFileService.streamCSVFile(filePath, flatFileConfig, (headers, batch) -> { + try { + // Transform data according to table mapping + List transformedData = transformData(batch, tableMapping); + + // Insert data into ClickHouse + clickHouseService.executeBatchInsert( + request.getConnectionId(), + request.getTableName(), + tableMapping.getColumnMappings().keySet(), + transformedData + ); + + // Update progress + statusService.updateProgress(statusId, batch.size()); + } catch (Exception e) { + log.error("Failed to process batch: {}", e.getMessage()); + throw new RuntimeException("Failed to process batch: " + e.getMessage()); + } + }); + + // Update total rows and mark as complete + statusService.updateTotalRows(statusId, totalRows); + statusService.updateStatus(statusId, "SUCCESS"); + } catch (Exception e) { + log.error("Ingestion failed: {}", e.getMessage()); + statusService.updateStatus(statusId, "FAILED"); + statusService.updateErrorMessage(statusId, e.getMessage()); + throw new RuntimeException("Ingestion failed: " + e.getMessage()); + } + } + + /** + * Creates a flat file configuration + * + * @param fileConfig File configuration from request + * @return FlatFileConfig + */ + private FlatFileConfig createFlatFileConfig(com.ingestion.dto.FlatFileConfig fileConfig) { + FlatFileConfig config = new FlatFileConfig(); + config.setDelimiter(fileConfig.getDelimiter()); + config.setQuoteChar(fileConfig.getQuoteChar()); + config.setEscapeChar(fileConfig.getEscapeChar()); + config.setLineSeparator(fileConfig.getLineSeparator()); + config.setHasHeader(fileConfig.isHasHeader()); + return config; + } + + /** + * Transforms data according to table mapping + * + * @param data Data to transform + * @param tableMapping Table mapping + * @return Transformed data + */ + private List transformData(List> data, TableMapping tableMapping) { + List transformedData = new ArrayList<>(); + Map columnMappings = tableMapping.getColumnMappings(); + + for (Map row : data) { + Object[] transformedRow = new Object[columnMappings.size()]; + int i = 0; + for (Map.Entry entry : columnMappings.entrySet()) { + String sourceColumn = entry.getValue(); + String value = row.get(sourceColumn); + transformedRow[i++] = value != null ? value : ""; + } + transformedData.add(transformedRow); + } + + return transformedData; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/FlatFileService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/FlatFileService.java new file mode 100644 index 000000000..05b2276c7 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/FlatFileService.java @@ -0,0 +1,230 @@ +package com.ingestion.service; + +import com.ingestion.dto.FileUploadRequest; +import com.ingestion.model.FlatFileConfig; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +@Service +@Slf4j +public class FlatFileService { + + private static final String UPLOAD_DIR = "uploads"; + private static final int BATCH_SIZE = 1000; + + /** + * Uploads a file to the server + * + * @param file File to upload + * @param request File upload request + * @return Path to the uploaded file + */ + public Path uploadFile(MultipartFile file, FileUploadRequest request) { + try { + // Create upload directory if it doesn't exist + Path uploadPath = Paths.get(UPLOAD_DIR); + if (!Files.exists(uploadPath)) { + Files.createDirectories(uploadPath); + } + + // Generate a unique filename + String originalFilename = file.getOriginalFilename(); + String fileExtension = originalFilename.substring(originalFilename.lastIndexOf(".")); + String uniqueFilename = System.currentTimeMillis() + "_" + originalFilename; + Path filePath = uploadPath.resolve(uniqueFilename); + + // Save the file + Files.copy(file.getInputStream(), filePath); + log.info("File uploaded successfully: {}", filePath); + + return filePath; + } catch (IOException e) { + log.error("Failed to upload file: {}", e.getMessage()); + throw new RuntimeException("Failed to upload file: " + e.getMessage()); + } + } + + /** + * Parses a CSV file and returns the headers and data + * + * @param filePath Path to the CSV file + * @param config CSV configuration + * @return Map containing headers and data + */ + public Map parseCSVFile(Path filePath, FlatFileConfig config) { + Map result = new HashMap<>(); + List headers = new ArrayList<>(); + List> data = new ArrayList<>(); + + try (BufferedReader reader = Files.newBufferedReader(filePath, StandardCharsets.UTF_8)) { + // Create CSV format based on configuration + CSVFormat csvFormat = createCSVFormat(config); + + // Parse the CSV file + CSVParser parser = new CSVParser(reader, csvFormat); + List records = parser.getRecords(); + + if (records.isEmpty()) { + result.put("headers", headers); + result.put("data", data); + return result; + } + + // Get headers + CSVRecord headerRecord = records.get(0); + for (String header : headerRecord) { + headers.add(header.trim()); + } + + // Get data + for (int i = 1; i < records.size(); i++) { + CSVRecord record = records.get(i); + Map row = new HashMap<>(); + + for (int j = 0; j < headers.size(); j++) { + if (j < record.size()) { + row.put(headers.get(j), record.get(j).trim()); + } else { + row.put(headers.get(j), ""); + } + } + + data.add(row); + } + + result.put("headers", headers); + result.put("data", data); + return result; + } catch (IOException e) { + log.error("Failed to parse CSV file: {}", e.getMessage()); + throw new RuntimeException("Failed to parse CSV file: " + e.getMessage()); + } + } + + /** + * Streams a CSV file and processes it in batches + * + * @param filePath Path to the CSV file + * @param config CSV configuration + * @param processor Batch processor + * @return Number of rows processed + */ + public long streamCSVFile(Path filePath, FlatFileConfig config, BatchProcessor processor) { + AtomicLong rowCount = new AtomicLong(0); + List> batch = new ArrayList<>(); + List headers = new ArrayList<>(); + + try (BufferedReader reader = Files.newBufferedReader(filePath, StandardCharsets.UTF_8)) { + // Create CSV format based on configuration + CSVFormat csvFormat = createCSVFormat(config); + + // Parse the CSV file + CSVParser parser = new CSVParser(reader, csvFormat); + List records = parser.getRecords(); + + if (records.isEmpty()) { + return 0; + } + + // Get headers + CSVRecord headerRecord = records.get(0); + for (String header : headerRecord) { + headers.add(header.trim()); + } + + // Process data in batches + for (int i = 1; i < records.size(); i++) { + CSVRecord record = records.get(i); + Map row = new HashMap<>(); + + for (int j = 0; j < headers.size(); j++) { + if (j < record.size()) { + row.put(headers.get(j), record.get(j).trim()); + } else { + row.put(headers.get(j), ""); + } + } + + batch.add(row); + rowCount.incrementAndGet(); + + // Process batch if it reaches the batch size + if (batch.size() >= BATCH_SIZE) { + processor.processBatch(batch); + batch.clear(); + } + } + + // Process remaining rows + if (!batch.isEmpty()) { + processor.processBatch(batch); + } + + return rowCount.get(); + } catch (IOException e) { + log.error("Failed to stream CSV file: {}", e.getMessage()); + throw new RuntimeException("Failed to stream CSV file: " + e.getMessage()); + } + } + + /** + * Creates a CSV format based on configuration + * + * @param config CSV configuration + * @return CSVFormat + */ + private CSVFormat createCSVFormat(FlatFileConfig config) { + CSVFormat csvFormat = CSVFormat.DEFAULT; + + // Set delimiter + if (config.getDelimiter() != null && !config.getDelimiter().isEmpty()) { + csvFormat = csvFormat.withDelimiter(config.getDelimiter().charAt(0)); + } + + // Set quote character + if (config.getQuoteChar() != null && !config.getQuoteChar().isEmpty()) { + csvFormat = csvFormat.withQuote(config.getQuoteChar().charAt(0)); + } + + // Set escape character + if (config.getEscapeChar() != null && !config.getEscapeChar().isEmpty()) { + csvFormat = csvFormat.withEscape(config.getEscapeChar().charAt(0)); + } + + // Set line separator + if (config.getLineSeparator() != null && !config.getLineSeparator().isEmpty()) { + csvFormat = csvFormat.withRecordSeparator(config.getLineSeparator()); + } + + // Set header row + if (config.isHasHeader()) { + csvFormat = csvFormat.withFirstRecordAsHeader(); + } else { + csvFormat = csvFormat.withSkipHeaderRecord(); + } + + return csvFormat; + } + + /** + * Interface for batch processing + */ + public interface BatchProcessor { + void processBatch(List> batch); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/IngestionStatusService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/IngestionStatusService.java new file mode 100644 index 000000000..dc935ee5d --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/IngestionStatusService.java @@ -0,0 +1,118 @@ +package com.ingestion.service; + +import com.ingestion.entity.IngestionStatus; +import com.ingestion.entity.IngestionStatusEnum; +import com.ingestion.repository.IngestionStatusRepository; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +@Service +@RequiredArgsConstructor +public class IngestionStatusService { + + private final IngestionStatusRepository ingestionStatusRepository; + + @Transactional + public IngestionStatus createIngestionStatus(String tableName, String fileName) { + IngestionStatus status = IngestionStatus.builder() + .tableName(tableName) + .fileName(fileName) + .status(IngestionStatusEnum.IN_PROGRESS) + .progress(0) + .retryCount(0) + .build(); + + return ingestionStatusRepository.save(status); + } + + @Transactional + public IngestionStatus updateStatus(Long id, IngestionStatusEnum status) { + IngestionStatus ingestionStatus = ingestionStatusRepository.findById(id) + .orElseThrow(() -> new RuntimeException("Ingestion status not found with id: " + id)); + + ingestionStatus.setStatus(status); + + if (status == IngestionStatusEnum.SUCCESS || status == IngestionStatusEnum.FAILED) { + ingestionStatus.setProgress(100); + } + + return ingestionStatusRepository.save(ingestionStatus); + } + + @Transactional + public IngestionStatus updateProgress(Long id, Integer progress) { + IngestionStatus ingestionStatus = ingestionStatusRepository.findById(id) + .orElseThrow(() -> new RuntimeException("Ingestion status not found with id: " + id)); + + ingestionStatus.setProgress(progress); + + return ingestionStatusRepository.save(ingestionStatus); + } + + @Transactional + public IngestionStatus updateTotalRows(Long id, Long totalRows) { + IngestionStatus ingestionStatus = ingestionStatusRepository.findById(id) + .orElseThrow(() -> new RuntimeException("Ingestion status not found with id: " + id)); + + ingestionStatus.setTotalRows(totalRows); + + return ingestionStatusRepository.save(ingestionStatus); + } + + @Transactional + public IngestionStatus updateProcessedRows(Long id, Long processedRows) { + IngestionStatus ingestionStatus = ingestionStatusRepository.findById(id) + .orElseThrow(() -> new RuntimeException("Ingestion status not found with id: " + id)); + + ingestionStatus.setProcessedRows(processedRows); + + if (ingestionStatus.getTotalRows() != null && ingestionStatus.getTotalRows() > 0) { + int progress = (int) ((processedRows * 100) / ingestionStatus.getTotalRows()); + ingestionStatus.setProgress(progress); + } + + return ingestionStatusRepository.save(ingestionStatus); + } + + @Transactional + public IngestionStatus updateErrorMessage(Long id, String errorMessage) { + IngestionStatus ingestionStatus = ingestionStatusRepository.findById(id) + .orElseThrow(() -> new RuntimeException("Ingestion status not found with id: " + id)); + + ingestionStatus.setErrorMessage(errorMessage); + ingestionStatus.setStatus(IngestionStatusEnum.FAILED); + + return ingestionStatusRepository.save(ingestionStatus); + } + + @Transactional + public IngestionStatus incrementRetryCount(Long id) { + IngestionStatus ingestionStatus = ingestionStatusRepository.findById(id) + .orElseThrow(() -> new RuntimeException("Ingestion status not found with id: " + id)); + + ingestionStatus.setRetryCount(ingestionStatus.getRetryCount() + 1); + + return ingestionStatusRepository.save(ingestionStatus); + } + + public List getStatusByTableName(String tableName) { + return ingestionStatusRepository.findByTableName(tableName); + } + + public List getStatusByFileName(String fileName) { + return ingestionStatusRepository.findByFileName(fileName); + } + + public List getIncompleteIngestions() { + return ingestionStatusRepository.findByCompletedAtIsNull(); + } + + public Optional getStatusById(Long id) { + return ingestionStatusRepository.findById(id); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/TableMappingService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/TableMappingService.java new file mode 100644 index 000000000..683d5d0fb --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/ingestion/service/TableMappingService.java @@ -0,0 +1,63 @@ +package com.ingestion.service; + +import com.ingestion.dto.TableMappingRequest; +import com.ingestion.entity.TableMapping; +import com.ingestion.repository.TableMappingRepository; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import javax.persistence.EntityNotFoundException; +import java.util.List; + +@Service +public class TableMappingService { + + private final TableMappingRepository tableMappingRepository; + + @Autowired + public TableMappingService(TableMappingRepository tableMappingRepository) { + this.tableMappingRepository = tableMappingRepository; + } + + @Transactional + public TableMapping createTableMapping(TableMappingRequest request) { + if (tableMappingRepository.existsByTableName(request.getTableName())) { + throw new IllegalArgumentException("Table mapping already exists for table: " + request.getTableName()); + } + + TableMapping tableMapping = new TableMapping(); + tableMapping.setTableName(request.getTableName()); + tableMapping.setSchemaDefinition(request.getSchemaDefinition()); + tableMapping.setColumnMappings(request.getColumnMappings()); + + return tableMappingRepository.save(tableMapping); + } + + @Transactional(readOnly = true) + public TableMapping getTableMapping(String tableName) { + return tableMappingRepository.findByTableName(tableName) + .orElseThrow(() -> new EntityNotFoundException("Table mapping not found for table: " + tableName)); + } + + @Transactional(readOnly = true) + public List getAllTableMappings() { + return tableMappingRepository.findAll(); + } + + @Transactional + public TableMapping updateTableMapping(String tableName, TableMappingRequest request) { + TableMapping existingMapping = getTableMapping(tableName); + + existingMapping.setSchemaDefinition(request.getSchemaDefinition()); + existingMapping.setColumnMappings(request.getColumnMappings()); + + return tableMappingRepository.save(existingMapping); + } + + @Transactional + public void deleteTableMapping(String tableName) { + TableMapping tableMapping = getTableMapping(tableName); + tableMappingRepository.delete(tableMapping); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/IngestionApplication.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/IngestionApplication.java new file mode 100644 index 000000000..eb9ad9e15 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/IngestionApplication.java @@ -0,0 +1,13 @@ +package com.wrangler.ingestion; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.scheduling.annotation.EnableAsync; + +@SpringBootApplication +@EnableAsync +public class IngestionApplication { + public static void main(String[] args) { + SpringApplication.run(IngestionApplication.class, args); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/config/ClickHouseConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/config/ClickHouseConfig.java new file mode 100644 index 000000000..2bc400012 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/config/ClickHouseConfig.java @@ -0,0 +1,23 @@ +package com.wrangler.ingestion.config; + +import com.clickhouse.jdbc.ClickHouseDataSource; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import javax.sql.DataSource; +import java.util.Properties; + +@Configuration +public class ClickHouseConfig { + + @Bean + public DataSource clickHouseDataSource() { + Properties properties = new Properties(); + properties.setProperty("user", "${CLICKHOUSE_USER}"); + properties.setProperty("password", "${CLICKHOUSE_PASSWORD}"); + properties.setProperty("ssl", "true"); + properties.setProperty("sslmode", "STRICT"); + + return new ClickHouseDataSource("jdbc:clickhouse://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT}/${CLICKHOUSE_DATABASE}", properties); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/config/JwtConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/config/JwtConfig.java new file mode 100644 index 000000000..8763069a0 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/config/JwtConfig.java @@ -0,0 +1,44 @@ +package com.wrangler.ingestion.config; + +import io.jsonwebtoken.Claims; +import io.jsonwebtoken.Jwts; +import io.jsonwebtoken.SignatureAlgorithm; +import io.jsonwebtoken.security.Keys; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.security.Key; +import java.util.Date; + +@Configuration +public class JwtConfig { + + @Value("${jwt.secret}") + private String secret; + + @Value("${jwt.expiration}") + private Long expiration; + + @Bean + public Key key() { + return Keys.hmacShaKeyFor(secret.getBytes()); + } + + public String generateToken(String username) { + return Jwts.builder() + .setSubject(username) + .setIssuedAt(new Date(System.currentTimeMillis())) + .setExpiration(new Date(System.currentTimeMillis() + expiration)) + .signWith(key(), SignatureAlgorithm.HS256) + .compact(); + } + + public Claims validateToken(String token) { + return Jwts.parserBuilder() + .setSigningKey(key()) + .build() + .parseClaimsJws(token) + .getBody(); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/controller/IngestionController.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/controller/IngestionController.java new file mode 100644 index 000000000..bf4ed0c93 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/controller/IngestionController.java @@ -0,0 +1,119 @@ +package com.wrangler.ingestion.controller; + +import com.wrangler.ingestion.model.ConnectionConfig; +import com.wrangler.ingestion.service.ClickHouseService; +import com.wrangler.ingestion.service.FlatFileService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.multipart.MultipartFile; + +import javax.sql.DataSource; +import javax.validation.Valid; +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +@Slf4j +@RestController +@RequestMapping("/api/ingestion") +@RequiredArgsConstructor +public class IngestionController { + + private final ClickHouseService clickHouseService; + private final FlatFileService flatFileService; + + @PostMapping("/connect") + public ResponseEntity connect(@Valid @RequestBody ConnectionConfig config) { + try { + DataSource dataSource = clickHouseService.createDataSource(config); + List tables = clickHouseService.getTables(dataSource); + return ResponseEntity.ok(tables); + } catch (SQLException e) { + log.error("Failed to connect to ClickHouse", e); + return ResponseEntity.badRequest().body("Failed to connect: " + e.getMessage()); + } + } + + @GetMapping("/tables/{tableName}/columns") + public ResponseEntity getColumns(@PathVariable String tableName, @Valid @RequestBody ConnectionConfig config) { + try { + DataSource dataSource = clickHouseService.createDataSource(config); + List columns = clickHouseService.getColumns(dataSource, tableName); + return ResponseEntity.ok(columns); + } catch (SQLException e) { + log.error("Failed to get columns", e); + return ResponseEntity.badRequest().body("Failed to get columns: " + e.getMessage()); + } + } + + @PostMapping("/file/columns") + public ResponseEntity getFileColumns( + @RequestParam("file") MultipartFile file, + @RequestParam("delimiter") String delimiter) { + try { + List columns = flatFileService.getColumns(file, delimiter); + return ResponseEntity.ok(columns); + } catch (IOException e) { + log.error("Failed to read file columns", e); + return ResponseEntity.badRequest().body("Failed to read file: " + e.getMessage()); + } + } + + @PostMapping("/clickhouse-to-file") + public ResponseEntity clickHouseToFile( + @Valid @RequestBody ConnectionConfig config, + @RequestParam String tableName, + @RequestParam List columns, + @RequestParam String filePath, + @RequestParam String delimiter) { + try { + DataSource dataSource = clickHouseService.createDataSource(config); + + CompletableFuture future = clickHouseService.exportToFile( + dataSource, tableName, columns, filePath, delimiter, + count -> log.info("Exported {} rows", count) + ); + + Long totalRows = future.get(); + return ResponseEntity.ok().body("Export completed successfully. Total rows: " + totalRows); + } catch (Exception e) { + log.error("Failed to export from ClickHouse to file", e); + return ResponseEntity.badRequest().body("Export failed: " + e.getMessage()); + } + } + + @PostMapping("/file-to-clickhouse") + public ResponseEntity fileToClickHouse( + @RequestParam("file") MultipartFile file, + @RequestParam String delimiter, + @Valid @RequestBody ConnectionConfig config, + @RequestParam String tableName, + @RequestParam List columns) { + try { + // Save uploaded file temporarily + String tempFilePath = System.getProperty("java.io.tmpdir") + "/" + file.getOriginalFilename(); + file.transferTo(new File(tempFilePath)); + + DataSource dataSource = clickHouseService.createDataSource(config); + + CompletableFuture future = clickHouseService.importFromFile( + dataSource, tableName, columns, tempFilePath, delimiter, + count -> log.info("Imported {} rows", count) + ); + + Long totalRows = future.get(); + + // Clean up temp file + new File(tempFilePath).delete(); + + return ResponseEntity.ok().body("Import completed successfully. Total rows: " + totalRows); + } catch (Exception e) { + log.error("Failed to import file to ClickHouse", e); + return ResponseEntity.badRequest().body("Import failed: " + e.getMessage()); + } + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/exception/GlobalExceptionHandler.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/exception/GlobalExceptionHandler.java new file mode 100644 index 000000000..7aa367d0e --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/exception/GlobalExceptionHandler.java @@ -0,0 +1,55 @@ +package com.wrangler.ingestion.exception; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.ControllerAdvice; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.multipart.MaxUploadSizeExceededException; + +import java.io.IOException; +import java.sql.SQLException; + +@Slf4j +@ControllerAdvice +public class GlobalExceptionHandler { + + @ExceptionHandler(SQLException.class) + public ResponseEntity handleSQLException(SQLException e) { + log.error("Database error occurred", e); + return ResponseEntity.badRequest() + .body(new ErrorResponse("Database error: " + e.getMessage())); + } + + @ExceptionHandler(MaxUploadSizeExceededException.class) + public ResponseEntity handleMaxUploadSizeExceededException(MaxUploadSizeExceededException e) { + log.error("File size exceeded limit", e); + return ResponseEntity.badRequest() + .body(new ErrorResponse("File size exceeds the maximum limit")); + } + + @ExceptionHandler(IOException.class) + public ResponseEntity handleIOException(IOException e) { + log.error("File operation error occurred", e); + return ResponseEntity.badRequest() + .body(new ErrorResponse("File operation error: " + e.getMessage())); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity handleGenericException(Exception e) { + log.error("Unexpected error occurred", e); + return ResponseEntity.internalServerError() + .body(new ErrorResponse("An unexpected error occurred: " + e.getMessage())); + } + + private static class ErrorResponse { + private final String message; + + public ErrorResponse(String message) { + this.message = message; + } + + public String getMessage() { + return message; + } + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/model/ConnectionConfig.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/model/ConnectionConfig.java new file mode 100644 index 000000000..8aeb080c3 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/model/ConnectionConfig.java @@ -0,0 +1,27 @@ +package com.wrangler.ingestion.model; + +import lombok.Data; + +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; + +@Data +public class ConnectionConfig { + @NotBlank + private String host; + + @NotNull + private Integer port; + + @NotBlank + private String database; + + @NotBlank + private String user; + + @NotBlank + private String jwtToken; + + private String sslMode = "STRICT"; + private boolean useSsl = true; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/service/ClickHouseService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/service/ClickHouseService.java new file mode 100644 index 000000000..6a79fc895 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/service/ClickHouseService.java @@ -0,0 +1,166 @@ +package com.wrangler.ingestion.service; + +import com.clickhouse.jdbc.ClickHouseDataSource; +import com.wrangler.ingestion.model.ConnectionConfig; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import javax.sql.DataSource; +import java.sql.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicLong; + +@Slf4j +@Service +public class ClickHouseService { + + public DataSource createDataSource(ConnectionConfig config) { + Properties properties = new Properties(); + properties.setProperty("user", config.getUser()); + properties.setProperty("password", config.getJwtToken()); + properties.setProperty("ssl", String.valueOf(config.isUseSsl())); + properties.setProperty("sslmode", config.getSslMode()); + + String url = String.format("jdbc:clickhouse://%s:%d/%s", + config.getHost(), config.getPort(), config.getDatabase()); + + return new ClickHouseDataSource(url, properties); + } + + public List getTables(DataSource dataSource) throws SQLException { + List tables = new ArrayList<>(); + try (Connection conn = dataSource.getConnection()) { + DatabaseMetaData metaData = conn.getMetaData(); + ResultSet rs = metaData.getTables(null, null, "%", new String[]{"TABLE"}); + + while (rs.next()) { + tables.add(rs.getString("TABLE_NAME")); + } + } + return tables; + } + + public List getColumns(DataSource dataSource, String tableName) throws SQLException { + List columns = new ArrayList<>(); + try (Connection conn = dataSource.getConnection()) { + DatabaseMetaData metaData = conn.getMetaData(); + ResultSet rs = metaData.getColumns(null, null, tableName, "%"); + + while (rs.next()) { + columns.add(rs.getString("COLUMN_NAME")); + } + } + return columns; + } + + public long getRowCount(DataSource dataSource, String tableName) throws SQLException { + try (Connection conn = dataSource.getConnection()) { + try (var stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery("SELECT count() FROM " + tableName); + if (rs.next()) { + return rs.getLong(1); + } + } + } + return 0; + } + + public CompletableFuture exportToFile(DataSource dataSource, String tableName, + List columns, String filePath, String delimiter, + ProgressCallback progressCallback) { + return CompletableFuture.supplyAsync(() -> { + AtomicLong rowCount = new AtomicLong(0); + try (Connection conn = dataSource.getConnection()) { + String columnList = String.join(", ", columns); + String query = String.format("SELECT %s FROM %s", columnList, tableName); + + try (Statement stmt = conn.createStatement()) { + stmt.setFetchSize(10000); // Set batch size + ResultSet rs = stmt.executeQuery(query); + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) { + // Write header + writer.write(String.join(delimiter, columns)); + writer.newLine(); + + // Write data + while (rs.next()) { + List row = new ArrayList<>(); + for (String column : columns) { + row.add(rs.getString(column)); + } + writer.write(String.join(delimiter, row)); + writer.newLine(); + + long count = rowCount.incrementAndGet(); + if (count % 10000 == 0) { + progressCallback.onProgress(count); + } + } + } + } + } catch (Exception e) { + log.error("Failed to export data to file", e); + throw new RuntimeException("Export failed: " + e.getMessage()); + } + return rowCount.get(); + }); + } + + public CompletableFuture importFromFile(DataSource dataSource, String tableName, + List columns, String filePath, String delimiter, + ProgressCallback progressCallback) { + return CompletableFuture.supplyAsync(() -> { + AtomicLong rowCount = new AtomicLong(0); + try (Connection conn = dataSource.getConnection()) { + String columnList = String.join(", ", columns); + String placeholders = String.join(", ", columns.stream() + .map(c -> "?").toList()); + + String insertSql = String.format("INSERT INTO %s (%s) VALUES (%s)", + tableName, columnList, placeholders); + + try (PreparedStatement pstmt = conn.prepareStatement(insertSql)) { + conn.setAutoCommit(false); + + try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) { + // Skip header + reader.readLine(); + + String line; + while ((line = reader.readLine()) != null) { + String[] values = line.split(delimiter); + for (int i = 0; i < values.length; i++) { + pstmt.setString(i + 1, values[i].trim()); + } + pstmt.addBatch(); + + long count = rowCount.incrementAndGet(); + if (count % 10000 == 0) { + pstmt.executeBatch(); + conn.commit(); + progressCallback.onProgress(count); + } + } + + // Execute remaining batch + pstmt.executeBatch(); + conn.commit(); + } + } + } catch (Exception e) { + log.error("Failed to import data from file", e); + throw new RuntimeException("Import failed: " + e.getMessage()); + } + return rowCount.get(); + }); + } + + @FunctionalInterface + public interface ProgressCallback { + void onProgress(long count); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/service/FlatFileService.java b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/service/FlatFileService.java new file mode 100644 index 000000000..c33507d0a --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/java/com/wrangler/ingestion/service/FlatFileService.java @@ -0,0 +1,63 @@ +package com.wrangler.ingestion.service; + +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +@Slf4j +@Service +public class FlatFileService { + + public List getColumns(MultipartFile file, String delimiter) throws IOException { + List columns = new ArrayList<>(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(file.getInputStream()))) { + String headerLine = reader.readLine(); + if (headerLine != null) { + String[] headers = headerLine.split(delimiter); + for (String header : headers) { + columns.add(header.trim()); + } + } + } + return columns; + } + + public long getRowCount(MultipartFile file, String delimiter) throws IOException { + long count = 0; + try (BufferedReader reader = new BufferedReader(new InputStreamReader(file.getInputStream()))) { + // Skip header + reader.readLine(); + + while (reader.readLine() != null) { + count++; + } + } + return count; + } + + public void writeToFile(List data, String filePath, String delimiter) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) { + for (String line : data) { + writer.write(line); + writer.newLine(); + } + } + } + + public List readFromFile(String filePath, String delimiter) throws IOException { + List data = new ArrayList<>(); + try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) { + String line; + while ((line = reader.readLine()) != null) { + data.add(line); + } + } + return data; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/resources/application.properties b/clickhouse-flatfile-ingestion/backend/src/main/resources/application.properties new file mode 100644 index 000000000..d4adac2fc --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/resources/application.properties @@ -0,0 +1,43 @@ +# Server Configuration +server.port=8080 +server.servlet.context-path=/api + +# Database Configuration +spring.datasource.url=${DB_URL:jdbc:postgresql://localhost:5432/ingestion_db} +spring.datasource.username=${DB_USERNAME:postgres} +spring.datasource.password=${DB_PASSWORD} +spring.jpa.hibernate.ddl-auto=update +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.PostgreSQLDialect + +# ClickHouse Configuration +clickhouse.host=${CLICKHOUSE_HOST:localhost} +clickhouse.port=${CLICKHOUSE_PORT:8443} +clickhouse.database=${CLICKHOUSE_DATABASE:default} +clickhouse.user=${CLICKHOUSE_USER:default} +clickhouse.password=${CLICKHOUSE_PASSWORD} +clickhouse.ssl=true +clickhouse.sslmode=STRICT + +# File Upload Configuration +spring.servlet.multipart.max-file-size=100MB +spring.servlet.multipart.max-request-size=100MB +file.upload-dir=${UPLOAD_DIR:./uploads} + +# Logging Configuration +logging.level.root=INFO +logging.level.com.ingestion=DEBUG +logging.level.com.wrangler.ingestion=DEBUG +logging.pattern.console=%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + +# Async Configuration +spring.task.execution.pool.core-size=5 +spring.task.execution.pool.max-size=10 +spring.task.execution.pool.queue-capacity=25 + +# H2 Console Configuration +spring.h2.console.enabled=true +spring.h2.console.path=/h2-console + +# JWT Configuration +jwt.secret=${JWT_SECRET} +jwt.expiration=86400000 \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/main/resources/logback-spring.xml b/clickhouse-flatfile-ingestion/backend/src/main/resources/logback-spring.xml new file mode 100644 index 000000000..2dca01e17 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/main/resources/logback-spring.xml @@ -0,0 +1,39 @@ + + + + + + + + %black(%d{ISO8601}) %highlight(%-5level) [%blue(%t)] %yellow(%C{1}): %msg%n%throwable + + + + + + ${LOGS}/spring-boot-logger.log + + %d %p %C{1} [%t] %m%n + + + + ${LOGS}/archived/spring-boot-logger-%d{yyyy-MM-dd}.%i.log + + 10MB + + 30 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/controller/IngestionStatusControllerTest.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/controller/IngestionStatusControllerTest.java new file mode 100644 index 000000000..76f3b174a --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/controller/IngestionStatusControllerTest.java @@ -0,0 +1,181 @@ +package com.ingestion.controller; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.ingestion.dto.IngestionRequest; +import com.ingestion.entity.IngestionStatus; +import com.ingestion.service.DataIngestionService; +import com.ingestion.service.IngestionStatusService; +import com.ingestion.service.TestUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.test.web.servlet.MockMvc; + +import java.time.LocalDateTime; +import java.util.Arrays; +import java.util.List; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.*; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +@WebMvcTest(IngestionStatusController.class) +public class IngestionStatusControllerTest { + + @Autowired + private MockMvc mockMvc; + + @Autowired + private ObjectMapper objectMapper; + + @MockBean + private DataIngestionService dataIngestionService; + + @MockBean + private IngestionStatusService ingestionStatusService; + + private IngestionRequest testRequest; + private IngestionStatus testStatus; + private MockMultipartFile testFile; + + @BeforeEach + void setUp() throws Exception { + testRequest = TestUtils.createTestIngestionRequest(); + testFile = new MockMultipartFile( + "file", + "test.csv", + "text/csv", + "id,name,age\n1,John,30".getBytes() + ); + + testStatus = new IngestionStatus(); + testStatus.setId("test-status-id"); + testStatus.setStatus(IngestionStatus.Status.IN_PROGRESS); + testStatus.setMessage("Processing"); + testStatus.setStartTime(LocalDateTime.now()); + testStatus.setRowsProcessed(0L); + } + + @Test + void testStartIngestion_Success() throws Exception { + // Arrange + when(dataIngestionService.startIngestion(any(), any(IngestionRequest.class))) + .thenReturn(testStatus.getId()); + + // Act & Assert + mockMvc.perform(multipart("/api/ingestion/start") + .file(testFile) + .param("tableName", testRequest.getTableName()) + .param("connectionId", testRequest.getConnectionId())) + .andExpect(status().isAccepted()) + .andExpect(jsonPath("$.statusId").value(testStatus.getId())); + + verify(dataIngestionService).startIngestion(any(), any(IngestionRequest.class)); + } + + @Test + void testGetStatus_Success() throws Exception { + // Arrange + when(ingestionStatusService.getStatus(testStatus.getId())).thenReturn(testStatus); + + // Act & Assert + mockMvc.perform(get("/api/ingestion/status/{statusId}", testStatus.getId())) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.id").value(testStatus.getId())) + .andExpect(jsonPath("$.status").value(testStatus.getStatus().toString())) + .andExpect(jsonPath("$.message").value(testStatus.getMessage())); + + verify(ingestionStatusService).getStatus(testStatus.getId()); + } + + @Test + void testGetStatus_NotFound() throws Exception { + // Arrange + String statusId = "non-existent-id"; + when(ingestionStatusService.getStatus(statusId)) + .thenThrow(new RuntimeException("Status not found")); + + // Act & Assert + mockMvc.perform(get("/api/ingestion/status/{statusId}", statusId)) + .andExpect(status().isNotFound()); + + verify(ingestionStatusService).getStatus(statusId); + } + + @Test + void testGetAllStatuses_Success() throws Exception { + // Arrange + IngestionStatus status2 = new IngestionStatus(); + status2.setId("test-status-id-2"); + status2.setStatus(IngestionStatus.Status.COMPLETED); + status2.setMessage("Completed"); + status2.setStartTime(LocalDateTime.now()); + status2.setEndTime(LocalDateTime.now()); + status2.setRowsProcessed(100L); + + List statuses = Arrays.asList(testStatus, status2); + when(ingestionStatusService.getAllStatuses()).thenReturn(statuses); + + // Act & Assert + mockMvc.perform(get("/api/ingestion/status")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$[0].id").value(testStatus.getId())) + .andExpect(jsonPath("$[1].id").value(status2.getId())) + .andExpect(jsonPath("$[0].status").value(testStatus.getStatus().toString())) + .andExpect(jsonPath("$[1].status").value(status2.getStatus().toString())); + + verify(ingestionStatusService).getAllStatuses(); + } + + @Test + void testDeleteStatus_Success() throws Exception { + // Arrange + doNothing().when(ingestionStatusService).deleteStatus(testStatus.getId()); + + // Act & Assert + mockMvc.perform(delete("/api/ingestion/status/{statusId}", testStatus.getId())) + .andExpect(status().isNoContent()); + + verify(ingestionStatusService).deleteStatus(testStatus.getId()); + } + + @Test + void testDeleteStatus_NotFound() throws Exception { + // Arrange + String statusId = "non-existent-id"; + doThrow(new RuntimeException("Status not found")) + .when(ingestionStatusService).deleteStatus(statusId); + + // Act & Assert + mockMvc.perform(delete("/api/ingestion/status/{statusId}", statusId)) + .andExpect(status().isNotFound()); + + verify(ingestionStatusService).deleteStatus(statusId); + } + + @Test + void testStartIngestion_InvalidRequest() throws Exception { + // Act & Assert + mockMvc.perform(multipart("/api/ingestion/start") + .file(testFile)) + .andExpect(status().isBadRequest()); + + verify(dataIngestionService, never()).startIngestion(any(), any(IngestionRequest.class)); + } + + @Test + void testStartIngestion_NoFile() throws Exception { + // Act & Assert + mockMvc.perform(multipart("/api/ingestion/start") + .param("tableName", testRequest.getTableName()) + .param("connectionId", testRequest.getConnectionId())) + .andExpect(status().isBadRequest()); + + verify(dataIngestionService, never()).startIngestion(any(), any(IngestionRequest.class)); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/controller/TableMappingControllerTest.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/controller/TableMappingControllerTest.java new file mode 100644 index 000000000..41bb66d70 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/controller/TableMappingControllerTest.java @@ -0,0 +1,150 @@ +package com.ingestion.controller; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.ingestion.dto.TableMappingRequest; +import com.ingestion.entity.TableMapping; +import com.ingestion.service.TableMappingService; +import com.ingestion.service.TestUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.*; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +@WebMvcTest(TableMappingController.class) +public class TableMappingControllerTest { + + @Autowired + private MockMvc mockMvc; + + @Autowired + private ObjectMapper objectMapper; + + @MockBean + private TableMappingService tableMappingService; + + private TableMappingRequest testRequest; + private TableMapping testTableMapping; + + @BeforeEach + void setUp() { + testRequest = TestUtils.createTestTableMappingRequest(); + testTableMapping = TestUtils.createTestTableMapping(); + } + + @Test + void testCreateTableMapping_Success() throws Exception { + // Arrange + when(tableMappingService.createTableMapping(any(TableMappingRequest.class))) + .thenReturn(testTableMapping); + + // Act & Assert + mockMvc.perform(post("/api/table-mappings") + .contentType(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(testRequest))) + .andExpect(status().isCreated()) + .andExpect(jsonPath("$.tableName").value(testTableMapping.getTableName())) + .andExpect(jsonPath("$.schemaDefinition").exists()) + .andExpect(jsonPath("$.columnMappings").exists()); + + verify(tableMappingService).createTableMapping(any(TableMappingRequest.class)); + } + + @Test + void testGetTableMapping_Success() throws Exception { + // Arrange + when(tableMappingService.getTableMapping(testTableMapping.getTableName())) + .thenReturn(testTableMapping); + + // Act & Assert + mockMvc.perform(get("/api/table-mappings/{tableName}", testTableMapping.getTableName())) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.tableName").value(testTableMapping.getTableName())) + .andExpect(jsonPath("$.schemaDefinition").exists()) + .andExpect(jsonPath("$.columnMappings").exists()); + + verify(tableMappingService).getTableMapping(testTableMapping.getTableName()); + } + + @Test + void testGetTableMapping_NotFound() throws Exception { + // Arrange + String tableName = "non_existent_table"; + when(tableMappingService.getTableMapping(tableName)) + .thenThrow(new RuntimeException("Table mapping not found")); + + // Act & Assert + mockMvc.perform(get("/api/table-mappings/{tableName}", tableName)) + .andExpect(status().isNotFound()); + + verify(tableMappingService).getTableMapping(tableName); + } + + @Test + void testUpdateTableMapping_Success() throws Exception { + // Arrange + when(tableMappingService.updateTableMapping(eq(testTableMapping.getTableName()), any(TableMappingRequest.class))) + .thenReturn(testTableMapping); + + // Act & Assert + mockMvc.perform(put("/api/table-mappings/{tableName}", testTableMapping.getTableName()) + .contentType(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(testRequest))) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.tableName").value(testTableMapping.getTableName())) + .andExpect(jsonPath("$.schemaDefinition").exists()) + .andExpect(jsonPath("$.columnMappings").exists()); + + verify(tableMappingService).updateTableMapping(eq(testTableMapping.getTableName()), any(TableMappingRequest.class)); + } + + @Test + void testUpdateTableMapping_NotFound() throws Exception { + // Arrange + String tableName = "non_existent_table"; + when(tableMappingService.updateTableMapping(eq(tableName), any(TableMappingRequest.class))) + .thenThrow(new RuntimeException("Table mapping not found")); + + // Act & Assert + mockMvc.perform(put("/api/table-mappings/{tableName}", tableName) + .contentType(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(testRequest))) + .andExpect(status().isNotFound()); + + verify(tableMappingService).updateTableMapping(eq(tableName), any(TableMappingRequest.class)); + } + + @Test + void testDeleteTableMapping_Success() throws Exception { + // Arrange + doNothing().when(tableMappingService).deleteTableMapping(testTableMapping.getTableName()); + + // Act & Assert + mockMvc.perform(delete("/api/table-mappings/{tableName}", testTableMapping.getTableName())) + .andExpect(status().isNoContent()); + + verify(tableMappingService).deleteTableMapping(testTableMapping.getTableName()); + } + + @Test + void testDeleteTableMapping_NotFound() throws Exception { + // Arrange + String tableName = "non_existent_table"; + doThrow(new RuntimeException("Table mapping not found")) + .when(tableMappingService).deleteTableMapping(tableName); + + // Act & Assert + mockMvc.perform(delete("/api/table-mappings/{tableName}", tableName)) + .andExpect(status().isNotFound()); + + verify(tableMappingService).deleteTableMapping(tableName); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/ClickHouseConnectionServiceTest.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/ClickHouseConnectionServiceTest.java new file mode 100644 index 000000000..88aaa01f5 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/ClickHouseConnectionServiceTest.java @@ -0,0 +1,186 @@ +package com.ingestion.service; + +import com.ingestion.dto.ConnectionRequest; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.test.util.ReflectionTestUtils; +import ru.yandex.clickhouse.ClickHouseDataSource; + +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +public class ClickHouseConnectionServiceTest { + + @InjectMocks + private ClickHouseConnectionService clickHouseConnectionService; + + @Mock + private ClickHouseDataSource mockDataSource; + + @Mock + private Connection mockConnection; + + @Mock + private DatabaseMetaData mockMetaData; + + @Mock + private ResultSet mockResultSet; + + @BeforeEach + public void setUp() throws SQLException { + MockitoAnnotations.openMocks(this); + when(mockDataSource.getConnection()).thenReturn(mockConnection); + when(mockConnection.getMetaData()).thenReturn(mockMetaData); + when(mockMetaData.getTables(any(), any(), any(), any())).thenReturn(mockResultSet); + when(mockMetaData.getColumns(any(), any(), any(), any())).thenReturn(mockResultSet); + } + + @Test + public void testCreateConnection_Success() throws SQLException { + // Arrange + ConnectionRequest request = TestUtils.createTestConnectionRequest(); + + // Act + String connectionId = clickHouseConnectionService.createConnection(request); + + // Assert + assertNotNull(connectionId); + assertTrue(connectionId.contains(request.getHost())); + assertTrue(connectionId.contains(request.getDatabase())); + } + + @Test + public void testCreateConnection_DuplicateConnection() throws SQLException { + // Arrange + ConnectionRequest request = TestUtils.createTestConnectionRequest(); + + // Act + String connectionId1 = clickHouseConnectionService.createConnection(request); + String connectionId2 = clickHouseConnectionService.createConnection(request); + + // Assert + assertEquals(connectionId1, connectionId2); + } + + @Test + public void testTestConnection_Success() { + // Arrange + ConnectionRequest request = TestUtils.createTestConnectionRequest(); + + // Act + boolean result = clickHouseConnectionService.testConnection(request); + + // Assert + assertTrue(result); + } + + @Test + public void testTestConnection_Failure() throws SQLException { + // Arrange + ConnectionRequest request = TestUtils.createTestConnectionRequest(); + when(mockDataSource.getConnection()).thenThrow(new SQLException("Connection failed")); + + // Act + boolean result = clickHouseConnectionService.testConnection(request); + + // Assert + assertFalse(result); + } + + @Test + public void testGetTables_Success() throws SQLException { + // Arrange + String connectionId = "test_connection"; + ReflectionTestUtils.setField(clickHouseConnectionService, "connectionPool", Map.of(connectionId, mockDataSource)); + + when(mockResultSet.next()).thenReturn(true, true, false); + when(mockResultSet.getString("TABLE_NAME")).thenReturn("table1", "table2"); + + // Act + List tables = clickHouseConnectionService.getTables(connectionId); + + // Assert + assertEquals(2, tables.size()); + assertTrue(tables.contains("table1")); + assertTrue(tables.contains("table2")); + } + + @Test + public void testGetTableSchema_Success() throws SQLException { + // Arrange + String connectionId = "test_connection"; + String tableName = "test_table"; + ReflectionTestUtils.setField(clickHouseConnectionService, "connectionPool", Map.of(connectionId, mockDataSource)); + + when(mockResultSet.next()).thenReturn(true, true, false); + when(mockResultSet.getString("COLUMN_NAME")).thenReturn("id", "name"); + when(mockResultSet.getString("TYPE_NAME")).thenReturn("UInt32", "String"); + + // Act + Map schema = clickHouseConnectionService.getTableSchema(connectionId, tableName); + + // Assert + assertEquals(2, schema.size()); + assertEquals("UInt32", schema.get("id")); + assertEquals("String", schema.get("name")); + } + + @Test + public void testExecuteQuery_Success() throws SQLException { + // Arrange + String connectionId = "test_connection"; + String query = "SELECT * FROM test_table"; + ReflectionTestUtils.setField(clickHouseConnectionService, "connectionPool", Map.of(connectionId, mockDataSource)); + + when(mockConnection.createStatement()).thenReturn(mock(java.sql.Statement.class)); + when(mockConnection.createStatement().executeQuery(query)).thenReturn(mockResultSet); + + // Act + ResultSet resultSet = clickHouseConnectionService.executeQuery(connectionId, query); + + // Assert + assertNotNull(resultSet); + verify(mockConnection.createStatement()).executeQuery(query); + } + + @Test + public void testExecuteBatchInsert_Success() throws SQLException { + // Arrange + String connectionId = "test_connection"; + String tableName = "test_table"; + List columns = List.of("id", "name", "age"); + List values = List.of( + new Object[]{1, "John", 30}, + new Object[]{2, "Jane", 25} + ); + + ReflectionTestUtils.setField(clickHouseConnectionService, "connectionPool", Map.of(connectionId, mockDataSource)); + + java.sql.PreparedStatement mockPreparedStatement = mock(java.sql.PreparedStatement.class); + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPreparedStatement); + when(mockPreparedStatement.executeBatch()).thenReturn(new int[]{1, 1}); + + // Act + int result = clickHouseConnectionService.executeBatchInsert(connectionId, tableName, columns, values); + + // Assert + assertEquals(2, result); + verify(mockPreparedStatement, times(2)).addBatch(); + verify(mockPreparedStatement).executeBatch(); + verify(mockConnection).commit(); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/DataIngestionServiceTest.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/DataIngestionServiceTest.java new file mode 100644 index 000000000..f5ba5780e --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/DataIngestionServiceTest.java @@ -0,0 +1,190 @@ +package com.ingestion.service; + +import com.ingestion.dto.IngestionRequest; +import com.ingestion.entity.IngestionStatus; +import com.ingestion.entity.TableMapping; +import com.ingestion.model.FlatFileConfig; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; +import java.nio.file.Path; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +public class DataIngestionServiceTest { + + @InjectMocks + private DataIngestionService dataIngestionService; + + @Mock + private FlatFileService flatFileService; + + @Mock + private ClickHouseConnectionService clickHouseConnectionService; + + @Mock + private TableMappingService tableMappingService; + + @Mock + private IngestionStatusService ingestionStatusService; + + private MultipartFile testFile; + private IngestionRequest testRequest; + private TableMapping testTableMapping; + private Path testFilePath; + + @BeforeEach + void setUp() throws IOException { + testFile = TestUtils.createTestCSVFile(); + testRequest = TestUtils.createTestIngestionRequest(); + testTableMapping = TestUtils.createTestTableMapping(); + testFilePath = Path.of("test.csv"); + } + + @Test + void testStartIngestion_Success() throws IOException { + // Arrange + when(flatFileService.uploadFile(any(), any())).thenReturn(testFilePath); + when(ingestionStatusService.createStatus(anyString())).thenReturn("test-status-id"); + + // Act + String statusId = dataIngestionService.startIngestion(testFile, testRequest); + + // Assert + assertNotNull(statusId); + assertEquals("test-status-id", statusId); + verify(flatFileService).uploadFile(any(), any()); + verify(ingestionStatusService).createStatus(anyString()); + } + + @Test + void testProcessIngestion_Success() throws IOException, SQLException { + // Arrange + Map parseResult = new HashMap<>(); + parseResult.put("headers", Arrays.asList("id", "name", "age")); + List> data = Arrays.asList( + Map.of("id", "1", "name", "John", "age", "30"), + Map.of("id", "2", "name", "Jane", "age", "25") + ); + parseResult.put("data", data); + + when(tableMappingService.getTableMapping(anyString())).thenReturn(testTableMapping); + when(flatFileService.parseCSVFile(any(), any())).thenReturn(parseResult); + when(clickHouseConnectionService.executeBatchInsert(anyString(), anyString(), any(), any())).thenReturn(2); + + // Act + boolean result = dataIngestionService.processIngestion(testFilePath, testRequest, "test-status-id"); + + // Assert + assertTrue(result); + verify(tableMappingService).getTableMapping(testRequest.getTableName()); + verify(flatFileService).parseCSVFile(any(), any()); + verify(clickHouseConnectionService).executeBatchInsert(anyString(), anyString(), any(), any()); + verify(ingestionStatusService).updateStatus(anyString(), any(IngestionStatus.Status.class), anyString(), anyLong()); + } + + @Test + void testProcessIngestion_EmptyFile() throws IOException { + // Arrange + Map parseResult = new HashMap<>(); + parseResult.put("headers", Arrays.asList("id", "name", "age")); + parseResult.put("data", List.of()); + + when(tableMappingService.getTableMapping(anyString())).thenReturn(testTableMapping); + when(flatFileService.parseCSVFile(any(), any())).thenReturn(parseResult); + + // Act + boolean result = dataIngestionService.processIngestion(testFilePath, testRequest, "test-status-id"); + + // Assert + assertFalse(result); + verify(tableMappingService).getTableMapping(testRequest.getTableName()); + verify(flatFileService).parseCSVFile(any(), any()); + verify(ingestionStatusService).updateStatus(anyString(), eq(IngestionStatus.Status.FAILED), anyString(), eq(0L)); + } + + @Test + void testProcessIngestion_InvalidData() throws IOException { + // Arrange + Map parseResult = new HashMap<>(); + parseResult.put("headers", Arrays.asList("invalid_column")); + parseResult.put("data", List.of(Map.of("invalid_column", "value"))); + + when(tableMappingService.getTableMapping(anyString())).thenReturn(testTableMapping); + when(flatFileService.parseCSVFile(any(), any())).thenReturn(parseResult); + + // Act + boolean result = dataIngestionService.processIngestion(testFilePath, testRequest, "test-status-id"); + + // Assert + assertFalse(result); + verify(tableMappingService).getTableMapping(testRequest.getTableName()); + verify(flatFileService).parseCSVFile(any(), any()); + verify(ingestionStatusService).updateStatus(anyString(), eq(IngestionStatus.Status.FAILED), anyString(), eq(0L)); + } + + @Test + void testProcessIngestion_DatabaseError() throws IOException, SQLException { + // Arrange + Map parseResult = new HashMap<>(); + parseResult.put("headers", Arrays.asList("id", "name", "age")); + List> data = Arrays.asList( + Map.of("id", "1", "name", "John", "age", "30") + ); + parseResult.put("data", data); + + when(tableMappingService.getTableMapping(anyString())).thenReturn(testTableMapping); + when(flatFileService.parseCSVFile(any(), any())).thenReturn(parseResult); + when(clickHouseConnectionService.executeBatchInsert(anyString(), anyString(), any(), any())) + .thenThrow(new SQLException("Database error")); + + // Act + boolean result = dataIngestionService.processIngestion(testFilePath, testRequest, "test-status-id"); + + // Assert + assertFalse(result); + verify(tableMappingService).getTableMapping(testRequest.getTableName()); + verify(flatFileService).parseCSVFile(any(), any()); + verify(ingestionStatusService).updateStatus(anyString(), eq(IngestionStatus.Status.FAILED), anyString(), eq(0L)); + } + + @Test + void testCreateFlatFileConfig_Success() { + // Arrange + com.ingestion.dto.FlatFileConfig dtoConfig = new com.ingestion.dto.FlatFileConfig(); + dtoConfig.setDelimiter(','); + dtoConfig.setQuoteCharacter('"'); + dtoConfig.setEscapeCharacter('\\'); + dtoConfig.setHasHeader(true); + dtoConfig.setSkipEmptyLines(true); + dtoConfig.setTrimValues(true); + + // Act + FlatFileConfig result = dataIngestionService.createFlatFileConfig(dtoConfig); + + // Assert + assertNotNull(result); + assertEquals(dtoConfig.getDelimiter(), result.getDelimiter()); + assertEquals(dtoConfig.getQuoteCharacter(), result.getQuoteCharacter()); + assertEquals(dtoConfig.getEscapeCharacter(), result.getEscapeCharacter()); + assertEquals(dtoConfig.isHasHeader(), result.isHasHeader()); + assertEquals(dtoConfig.isSkipEmptyLines(), result.isSkipEmptyLines()); + assertEquals(dtoConfig.isTrimValues(), result.isTrimValues()); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/FlatFileServiceTest.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/FlatFileServiceTest.java new file mode 100644 index 000000000..e375de773 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/FlatFileServiceTest.java @@ -0,0 +1,204 @@ +package com.ingestion.service; + +import com.ingestion.dto.FileUploadRequest; +import com.ingestion.model.FlatFileConfig; +import org.apache.commons.csv.CSVFormat; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +public class FlatFileServiceTest { + + @InjectMocks + private FlatFileService flatFileService; + + private Path tempDir; + private MultipartFile testFile; + private FlatFileConfig testConfig; + + @BeforeEach + public void setUp() throws IOException { + tempDir = Files.createTempDirectory("flatfile-test"); + testFile = createTestCSVFile(); + testConfig = TestUtils.createTestFlatFileConfig(); + } + + @Test + public void testUploadFile_Success() throws IOException { + // Arrange + FileUploadRequest request = new FileUploadRequest(); + request.setFileName("test.csv"); + + // Act + Path uploadedPath = flatFileService.uploadFile(testFile, request); + + // Assert + assertTrue(Files.exists(uploadedPath)); + assertTrue(Files.isRegularFile(uploadedPath)); + assertEquals("test.csv", uploadedPath.getFileName().toString()); + } + + @Test + public void testParseCSVFile_Success() throws IOException { + // Arrange + Path filePath = flatFileService.uploadFile(testFile, new FileUploadRequest()); + + // Act + Map> result = flatFileService.parseCSVFile(filePath, testConfig); + + // Assert + assertNotNull(result); + assertTrue(result.containsKey("id")); + assertTrue(result.containsKey("name")); + assertTrue(result.containsKey("age")); + assertEquals(2, result.get("id").size()); + assertEquals(2, result.get("name").size()); + assertEquals(2, result.get("age").size()); + } + + @Test + public void testStreamCSVFile_Success() throws IOException { + // Arrange + Path filePath = flatFileService.uploadFile(testFile, new FileUploadRequest()); + AtomicLong rowCount = new AtomicLong(0); + + // Act + long processedRows = flatFileService.streamCSVFile(filePath, testConfig, (batch) -> { + rowCount.addAndGet(batch.size()); + return true; + }); + + // Assert + assertEquals(2, processedRows); + assertEquals(2, rowCount.get()); + } + + @Test + public void testCreateCSVFormat_Success() { + // Act + CSVFormat format = flatFileService.createCSVFormat(testConfig); + + // Assert + assertNotNull(format); + assertEquals(testConfig.getDelimiter(), format.getDelimiter()); + assertEquals(testConfig.getQuoteCharacter(), format.getQuoteCharacter()); + assertEquals(testConfig.getEscapeCharacter(), format.getEscapeCharacter()); + assertEquals(testConfig.isHasHeader(), format.getSkipHeaderRecord()); + } + + @Test + public void testParseCSVFile_InvalidFile() throws IOException { + // Arrange + MultipartFile invalidFile = new MockMultipartFile( + "test.csv", + "test.csv", + "text/csv", + "invalid,csv,data\nwithout,proper,format".getBytes() + ); + Path filePath = flatFileService.uploadFile(invalidFile, new FileUploadRequest()); + + // Act & Assert + assertThrows(IOException.class, () -> { + flatFileService.parseCSVFile(filePath, testConfig); + }); + } + + @Test + public void testStreamCSVFile_EmptyFile() throws IOException { + // Arrange + MultipartFile emptyFile = new MockMultipartFile( + "empty.csv", + "empty.csv", + "text/csv", + "".getBytes() + ); + Path filePath = flatFileService.uploadFile(emptyFile, new FileUploadRequest()); + AtomicLong rowCount = new AtomicLong(0); + + // Act + long processedRows = flatFileService.streamCSVFile(filePath, testConfig, (batch) -> { + rowCount.addAndGet(batch.size()); + return true; + }); + + // Assert + assertEquals(0, processedRows); + assertEquals(0, rowCount.get()); + } + + @Test + void testCSVWithQuotes() throws IOException { + String content = "header1,header2\n\"value,1\",\"value,2\"\nvalue3,value4"; + Path filePath = tempDir.resolve("test.csv"); + Files.write(filePath, content.getBytes()); + + Map> result = flatFileService.parseCSVFile(filePath, testConfig); + List data = result.get("data"); + + assertEquals(2, data.size()); + assertEquals("value,1,value,2", data.get(0)); + assertEquals("value3,value4", data.get(1)); + } + + @Test + void testCSVWithEmptyLines() throws IOException { + testConfig.setSkipEmptyLines(true); + String content = "header1,header2\nvalue1,value2\n\nvalue3,value4"; + Path filePath = tempDir.resolve("test.csv"); + Files.write(filePath, content.getBytes()); + + Map> result = flatFileService.parseCSVFile(filePath, testConfig); + List data = result.get("data"); + + assertEquals(2, data.size()); + assertEquals("value1,value2", data.get(0)); + assertEquals("value3,value4", data.get(1)); + } + + @Test + void testCSVWithoutHeader() throws IOException { + testConfig.setHasHeader(false); + String content = "value1,value2\nvalue3,value4"; + Path filePath = tempDir.resolve("test.csv"); + Files.write(filePath, content.getBytes()); + + Map> result = flatFileService.parseCSVFile(filePath, testConfig); + List headers = result.get("headers"); + List data = result.get("data"); + + assertTrue(headers.isEmpty()); + assertEquals(2, data.size()); + assertEquals("value1,value2", data.get(0)); + assertEquals("value3,value4", data.get(1)); + } + + private MultipartFile createTestCSVFile() { + String csvContent = "id,name,age\n1,John,30\n2,Jane,25"; + return new MockMultipartFile( + "test.csv", + "test.csv", + "text/csv", + csvContent.getBytes() + ); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/IngestionStatusServiceTest.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/IngestionStatusServiceTest.java new file mode 100644 index 000000000..7535f11c1 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/IngestionStatusServiceTest.java @@ -0,0 +1,171 @@ +package com.ingestion.service; + +import com.ingestion.entity.IngestionStatus; +import com.ingestion.repository.IngestionStatusRepository; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.time.LocalDateTime; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +public class IngestionStatusServiceTest { + + @InjectMocks + private IngestionStatusService ingestionStatusService; + + @Mock + private IngestionStatusRepository statusRepository; + + private IngestionStatus testStatus; + + @BeforeEach + void setUp() { + testStatus = new IngestionStatus(); + testStatus.setId("test-status-id"); + testStatus.setStatus(IngestionStatus.Status.IN_PROGRESS); + testStatus.setMessage("Processing"); + testStatus.setStartTime(LocalDateTime.now()); + testStatus.setRowsProcessed(0L); + } + + @Test + void testCreateStatus_Success() { + // Arrange + String fileName = "test.csv"; + when(statusRepository.save(any(IngestionStatus.class))).thenReturn(testStatus); + + // Act + String statusId = ingestionStatusService.createStatus(fileName); + + // Assert + assertNotNull(statusId); + assertEquals(testStatus.getId(), statusId); + verify(statusRepository).save(any(IngestionStatus.class)); + } + + @Test + void testGetStatus_Success() { + // Arrange + when(statusRepository.findById(testStatus.getId())).thenReturn(Optional.of(testStatus)); + + // Act + IngestionStatus result = ingestionStatusService.getStatus(testStatus.getId()); + + // Assert + assertNotNull(result); + assertEquals(testStatus.getId(), result.getId()); + assertEquals(testStatus.getStatus(), result.getStatus()); + assertEquals(testStatus.getMessage(), result.getMessage()); + verify(statusRepository).findById(testStatus.getId()); + } + + @Test + void testGetStatus_NotFound() { + // Arrange + String statusId = "non-existent-id"; + when(statusRepository.findById(statusId)).thenReturn(Optional.empty()); + + // Act & Assert + assertThrows(RuntimeException.class, () -> ingestionStatusService.getStatus(statusId)); + verify(statusRepository).findById(statusId); + } + + @Test + void testUpdateStatus_Success() { + // Arrange + when(statusRepository.findById(testStatus.getId())).thenReturn(Optional.of(testStatus)); + when(statusRepository.save(any(IngestionStatus.class))).thenReturn(testStatus); + + // Act + IngestionStatus result = ingestionStatusService.updateStatus( + testStatus.getId(), + IngestionStatus.Status.COMPLETED, + "Completed successfully", + 100L + ); + + // Assert + assertNotNull(result); + assertEquals(IngestionStatus.Status.COMPLETED, result.getStatus()); + assertEquals("Completed successfully", result.getMessage()); + assertEquals(100L, result.getRowsProcessed()); + assertNotNull(result.getEndTime()); + verify(statusRepository).findById(testStatus.getId()); + verify(statusRepository).save(any(IngestionStatus.class)); + } + + @Test + void testUpdateStatus_NotFound() { + // Arrange + String statusId = "non-existent-id"; + when(statusRepository.findById(statusId)).thenReturn(Optional.empty()); + + // Act & Assert + assertThrows(RuntimeException.class, () -> + ingestionStatusService.updateStatus( + statusId, + IngestionStatus.Status.COMPLETED, + "Completed", + 100L + )); + verify(statusRepository).findById(statusId); + verify(statusRepository, never()).save(any(IngestionStatus.class)); + } + + @Test + void testGetAllStatuses_Success() { + // Arrange + IngestionStatus status2 = new IngestionStatus(); + status2.setId("test-status-id-2"); + status2.setStatus(IngestionStatus.Status.COMPLETED); + + List statuses = Arrays.asList(testStatus, status2); + when(statusRepository.findAll()).thenReturn(statuses); + + // Act + List results = ingestionStatusService.getAllStatuses(); + + // Assert + assertNotNull(results); + assertEquals(2, results.size()); + assertEquals(testStatus.getId(), results.get(0).getId()); + assertEquals(status2.getId(), results.get(1).getId()); + verify(statusRepository).findAll(); + } + + @Test + void testDeleteStatus_Success() { + // Arrange + when(statusRepository.findById(testStatus.getId())).thenReturn(Optional.of(testStatus)); + + // Act + ingestionStatusService.deleteStatus(testStatus.getId()); + + // Assert + verify(statusRepository).findById(testStatus.getId()); + verify(statusRepository).delete(testStatus); + } + + @Test + void testDeleteStatus_NotFound() { + // Arrange + String statusId = "non-existent-id"; + when(statusRepository.findById(statusId)).thenReturn(Optional.empty()); + + // Act & Assert + assertThrows(RuntimeException.class, () -> ingestionStatusService.deleteStatus(statusId)); + verify(statusRepository).findById(statusId); + verify(statusRepository, never()).delete(any(IngestionStatus.class)); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/TableMappingServiceTest.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/TableMappingServiceTest.java new file mode 100644 index 000000000..2c1a55788 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/TableMappingServiceTest.java @@ -0,0 +1,138 @@ +package com.ingestion.service; + +import com.ingestion.dto.TableMappingRequest; +import com.ingestion.entity.TableMapping; +import com.ingestion.repository.TableMappingRepository; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +public class TableMappingServiceTest { + + @InjectMocks + private TableMappingService tableMappingService; + + @Mock + private TableMappingRepository tableMappingRepository; + + private TableMappingRequest testRequest; + private TableMapping testTableMapping; + + @BeforeEach + void setUp() { + testRequest = TestUtils.createTestTableMappingRequest(); + testTableMapping = TestUtils.createTestTableMapping(); + } + + @Test + void testCreateTableMapping_Success() { + // Arrange + when(tableMappingRepository.save(any(TableMapping.class))).thenReturn(testTableMapping); + + // Act + TableMapping result = tableMappingService.createTableMapping(testRequest); + + // Assert + assertNotNull(result); + assertEquals(testTableMapping.getTableName(), result.getTableName()); + assertEquals(testTableMapping.getSchemaDefinition(), result.getSchemaDefinition()); + assertEquals(testTableMapping.getColumnMappings(), result.getColumnMappings()); + verify(tableMappingRepository).save(any(TableMapping.class)); + } + + @Test + void testGetTableMapping_Success() { + // Arrange + when(tableMappingRepository.findByTableName(testTableMapping.getTableName())) + .thenReturn(Optional.of(testTableMapping)); + + // Act + TableMapping result = tableMappingService.getTableMapping(testTableMapping.getTableName()); + + // Assert + assertNotNull(result); + assertEquals(testTableMapping.getTableName(), result.getTableName()); + assertEquals(testTableMapping.getSchemaDefinition(), result.getSchemaDefinition()); + assertEquals(testTableMapping.getColumnMappings(), result.getColumnMappings()); + verify(tableMappingRepository).findByTableName(testTableMapping.getTableName()); + } + + @Test + void testGetTableMapping_NotFound() { + // Arrange + String tableName = "non_existent_table"; + when(tableMappingRepository.findByTableName(tableName)).thenReturn(Optional.empty()); + + // Act & Assert + assertThrows(RuntimeException.class, () -> tableMappingService.getTableMapping(tableName)); + verify(tableMappingRepository).findByTableName(tableName); + } + + @Test + void testUpdateTableMapping_Success() { + // Arrange + when(tableMappingRepository.findByTableName(testTableMapping.getTableName())) + .thenReturn(Optional.of(testTableMapping)); + when(tableMappingRepository.save(any(TableMapping.class))).thenReturn(testTableMapping); + + // Act + TableMapping result = tableMappingService.updateTableMapping(testTableMapping.getTableName(), testRequest); + + // Assert + assertNotNull(result); + assertEquals(testTableMapping.getTableName(), result.getTableName()); + assertEquals(testTableMapping.getSchemaDefinition(), result.getSchemaDefinition()); + assertEquals(testTableMapping.getColumnMappings(), result.getColumnMappings()); + verify(tableMappingRepository).findByTableName(testTableMapping.getTableName()); + verify(tableMappingRepository).save(any(TableMapping.class)); + } + + @Test + void testUpdateTableMapping_NotFound() { + // Arrange + String tableName = "non_existent_table"; + when(tableMappingRepository.findByTableName(tableName)).thenReturn(Optional.empty()); + + // Act & Assert + assertThrows(RuntimeException.class, () -> + tableMappingService.updateTableMapping(tableName, testRequest)); + verify(tableMappingRepository).findByTableName(tableName); + verify(tableMappingRepository, never()).save(any(TableMapping.class)); + } + + @Test + void testDeleteTableMapping_Success() { + // Arrange + when(tableMappingRepository.findByTableName(testTableMapping.getTableName())) + .thenReturn(Optional.of(testTableMapping)); + + // Act + tableMappingService.deleteTableMapping(testTableMapping.getTableName()); + + // Assert + verify(tableMappingRepository).findByTableName(testTableMapping.getTableName()); + verify(tableMappingRepository).delete(testTableMapping); + } + + @Test + void testDeleteTableMapping_NotFound() { + // Arrange + String tableName = "non_existent_table"; + when(tableMappingRepository.findByTableName(tableName)).thenReturn(Optional.empty()); + + // Act & Assert + assertThrows(RuntimeException.class, () -> tableMappingService.deleteTableMapping(tableName)); + verify(tableMappingRepository).findByTableName(tableName); + verify(tableMappingRepository, never()).delete(any(TableMapping.class)); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/TestUtils.java b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/TestUtils.java new file mode 100644 index 000000000..6717d988e --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/java/com/ingestion/service/TestUtils.java @@ -0,0 +1,96 @@ +package com.ingestion.service; + +import com.ingestion.dto.ConnectionRequest; +import com.ingestion.dto.FlatFileConfig; +import com.ingestion.dto.IngestionRequest; +import com.ingestion.dto.TableMappingRequest; +import com.ingestion.entity.TableMapping; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.web.multipart.MultipartFile; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +public class TestUtils { + + public static ConnectionRequest createTestConnectionRequest() { + ConnectionRequest request = new ConnectionRequest(); + request.setHost("localhost"); + request.setPort(8123); + request.setDatabase("testdb"); + request.setUsername("default"); + request.setPassword(""); + return request; + } + + public static TableMappingRequest createTestTableMappingRequest() { + TableMappingRequest request = new TableMappingRequest(); + request.setTableName("test_table"); + + Map schemaDefinition = new HashMap<>(); + schemaDefinition.put("id", "UInt32"); + schemaDefinition.put("name", "String"); + schemaDefinition.put("age", "UInt8"); + request.setSchemaDefinition(schemaDefinition); + + Map columnMappings = new HashMap<>(); + columnMappings.put("id", "id"); + columnMappings.put("name", "name"); + columnMappings.put("age", "age"); + request.setColumnMappings(columnMappings); + + return request; + } + + public static FlatFileConfig createTestFlatFileConfig() { + FlatFileConfig config = new FlatFileConfig(); + config.setDelimiter(','); + config.setQuoteCharacter('"'); + config.setEscapeCharacter('\\'); + config.setHasHeader(true); + config.setSkipEmptyLines(true); + config.setTrimValues(true); + return config; + } + + public static IngestionRequest createTestIngestionRequest() { + IngestionRequest request = new IngestionRequest(); + request.setTableName("test_table"); + request.setConnectionId("test_connection"); + request.setFileConfig(createTestFlatFileConfig()); + return request; + } + + public static MultipartFile createTestCSVFile() throws IOException { + String content = "id,name,age\n1,John,30\n2,Jane,25\n3,Bob,40"; + return new MockMultipartFile( + "file", + "test.csv", + "text/csv", + new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)) + ); + } + + public static TableMapping createTestTableMapping() { + TableMapping tableMapping = new TableMapping(); + tableMapping.setId(1L); + tableMapping.setTableName("test_table"); + + Map schemaDefinition = new HashMap<>(); + schemaDefinition.put("id", "UInt32"); + schemaDefinition.put("name", "String"); + schemaDefinition.put("age", "UInt8"); + tableMapping.setSchemaDefinition(schemaDefinition); + + Map columnMappings = new HashMap<>(); + columnMappings.put("id", "id"); + columnMappings.put("name", "name"); + columnMappings.put("age", "age"); + tableMapping.setColumnMappings(columnMappings); + + return tableMapping; + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/backend/src/test/resources/application-test.yml b/clickhouse-flatfile-ingestion/backend/src/test/resources/application-test.yml new file mode 100644 index 000000000..2aee44fb4 --- /dev/null +++ b/clickhouse-flatfile-ingestion/backend/src/test/resources/application-test.yml @@ -0,0 +1,19 @@ +spring: + datasource: + url: jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1;DB_CLOSE_ON_EXIT=FALSE + username: sa + password: + driver-class-name: org.h2.Driver + jpa: + database-platform: org.hibernate.dialect.H2Dialect + hibernate: + ddl-auto: create-drop + show-sql: true + +clickhouse: + test: + host: localhost + port: 8123 + database: testdb + username: default + password: \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/.eslintrc.js b/clickhouse-flatfile-ingestion/frontend/.eslintrc.js new file mode 100644 index 000000000..cd4d2b885 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/.eslintrc.js @@ -0,0 +1,68 @@ +module.exports = { + env: { + browser: true, + es2021: true, + node: true, + }, + extends: [ + 'eslint:recommended', + 'plugin:react/recommended', + 'plugin:react-hooks/recommended', + 'plugin:security/recommended', + ], + parserOptions: { + ecmaFeatures: { + jsx: true, + }, + ecmaVersion: 12, + sourceType: 'module', + }, + plugins: ['react', 'react-hooks', 'security'], + rules: { + // Security rules + 'security/detect-object-injection': 'error', + 'security/detect-non-literal-regexp': 'error', + 'security/detect-unsafe-regex': 'error', + 'security/detect-buffer-noassert': 'error', + 'security/detect-child-process': 'error', + 'security/detect-disable-mustache-escape': 'error', + 'security/detect-eval-with-expression': 'error', + 'security/detect-no-csrf-before-method-override': 'error', + 'security/detect-non-literal-fs-filename': 'error', + 'security/detect-non-literal-require': 'error', + 'security/detect-possible-timing-attacks': 'error', + 'security/detect-pseudoRandomBytes': 'error', + + // React rules + 'react/prop-types': 'error', + 'react/jsx-uses-react': 'error', + 'react/jsx-uses-vars': 'error', + 'react/no-danger': 'error', + 'react/no-unsafe': ['error', { checkAliases: true }], + 'react-hooks/rules-of-hooks': 'error', + 'react-hooks/exhaustive-deps': 'warn', + + // Performance rules + 'react/no-unused-prop-types': 'error', + 'react/no-unused-state': 'error', + 'react/no-will-update-set-state': 'error', + 'react/no-array-index-key': 'warn', + + // General rules + 'no-console': ['warn', { allow: ['warn', 'error'] }], + 'no-debugger': 'warn', + 'no-unused-vars': ['error', { argsIgnorePattern: '^_' }], + 'prefer-const': 'error', + 'no-var': 'error', + 'eqeqeq': ['error', 'always'], + 'curly': ['error', 'all'], + 'indent': ['error', 2], + 'quotes': ['error', 'single'], + 'semi': ['error', 'always'], + }, + settings: { + react: { + version: 'detect', + }, + }, +}; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/package.json b/clickhouse-flatfile-ingestion/frontend/package.json new file mode 100644 index 000000000..bd6c58c46 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/package.json @@ -0,0 +1,83 @@ +{ + "name": "clickhouse-flatfile-ingestion-frontend", + "version": "1.0.0", + "private": true, + "dependencies": { + "@emotion/react": "^11.11.0", + "@emotion/styled": "^11.11.0", + "@mui/icons-material": "^5.11.16", + "@mui/material": "^5.13.0", + "@mui/x-data-grid": "^6.5.0", + "axios": "^1.4.0", + "jwt-decode": "^3.1.2", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-dropzone": "^14.2.3", + "react-router-dom": "^6.11.1", + "react-scripts": "5.0.1", + "react-toastify": "^9.1.3", + "web-vitals": "^3.3.1" + }, + "devDependencies": { + "@testing-library/jest-dom": "^5.16.5", + "@testing-library/react": "^14.0.0", + "@testing-library/user-event": "^14.4.3", + "@types/jest": "^29.5.1", + "@types/node": "^20.1.4", + "@types/react": "^18.2.6", + "@types/react-dom": "^18.2.4", + "jsdom": "^22.1.0", + "msw": "^1.2.1", + "vitest": "^0.31.1", + "vite": "^4.3.5", + "@vitejs/plugin-react": "^4.0.0", + "eslint": "^8.40.0", + "eslint-plugin-react": "^7.32.2", + "eslint-plugin-react-hooks": "^4.6.0", + "eslint-plugin-security": "^1.7.1", + "prettier": "^2.8.8", + "husky": "^8.0.3", + "lint-staged": "^13.2.2" + }, + "scripts": { + "start": "react-scripts start", + "build": "react-scripts build", + "test": "vitest", + "test:coverage": "vitest run --coverage", + "test:watch": "vitest watch", + "lint": "eslint src --ext .js,.jsx,.ts,.tsx", + "lint:fix": "eslint src --ext .js,.jsx,.ts,.tsx --fix", + "format": "prettier --write \"src/**/*.{js,jsx,ts,tsx,json,css,scss,md}\"", + "prepare": "husky install", + "analyze": "source-map-explorer 'build/static/js/*.js'", + "security-check": "npm audit", + "type-check": "tsc --noEmit" + }, + "eslintConfig": { + "extends": [ + "react-app", + "react-app/jest" + ] + }, + "browserslist": { + "production": [ + ">0.2%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 1 chrome version", + "last 1 firefox version", + "last 1 safari version" + ] + }, + "lint-staged": { + "src/**/*.{js,jsx,ts,tsx}": [ + "eslint --fix", + "prettier --write" + ], + "src/**/*.{json,css,scss,md}": [ + "prettier --write" + ] + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/App.js b/clickhouse-flatfile-ingestion/frontend/src/App.js new file mode 100644 index 000000000..51cb299ea --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/App.js @@ -0,0 +1,35 @@ +import React from 'react'; +import { ThemeProvider, createTheme } from '@mui/material/styles'; +import CssBaseline from '@mui/material/CssBaseline'; +import { Container, Box } from '@mui/material'; +import { ToastContainer } from 'react-toastify'; +import 'react-toastify/dist/ReactToastify.css'; +import IngestionTool from './components/IngestionTool'; + +const theme = createTheme({ + palette: { + mode: 'light', + primary: { + main: '#1976d2', + }, + secondary: { + main: '#dc004e', + }, + }, +}); + +function App() { + return ( + + + + + + + + + + ); +} + +export default App; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/ClickHouseConfig.js b/clickhouse-flatfile-ingestion/frontend/src/components/ClickHouseConfig.js new file mode 100644 index 000000000..ac4cbb9a2 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/ClickHouseConfig.js @@ -0,0 +1,139 @@ +import React, { useState } from 'react'; +import { + Box, + Card, + CardContent, + TextField, + Button, + Grid, + Typography, + Alert, + CircularProgress +} from '@mui/material'; +import clickhouseService from '../services/clickhouseService'; + +const ClickHouseConfig = ({ onConfigValid }) => { + const [config, setConfig] = useState({ + host: '', + port: 8123, + database: '', + username: '', + password: '' + }); + + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(''); + + const handleChange = (e) => { + const { name, value } = e.target; + setConfig(prev => ({ + ...prev, + [name]: value + })); + }; + + const handleTestConnection = async () => { + setLoading(true); + setError(''); + setSuccess(''); + + try { + await clickhouseService.testConnection(config); + setSuccess('Connection successful!'); + onConfigValid(config); + } catch (error) { + setError(error.message); + } finally { + setLoading(false); + } + }; + + return ( + + + + ClickHouse Configuration + + + + + + + + + + + + + + + + + + + + + + {error && ( + + {error} + + )} + {success && ( + + {success} + + )} + + + + + ); +}; + +export default ClickHouseConfig; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/ColumnSelection.js b/clickhouse-flatfile-ingestion/frontend/src/components/ColumnSelection.js new file mode 100644 index 000000000..1e866d3d3 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/ColumnSelection.js @@ -0,0 +1,134 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Typography, + List, + ListItem, + Checkbox, + ListItemText, + ListItemIcon, + Button, + CircularProgress, + Alert, + FormControlLabel, +} from '@mui/material'; +import axios from 'axios'; +import { toast } from 'react-toastify'; + +function ColumnSelection({ sourceType, connectionConfig, selectedTable, onSelect }) { + const [columns, setColumns] = useState([]); + const [selectedColumns, setSelectedColumns] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + useEffect(() => { + const fetchColumns = async () => { + try { + setLoading(true); + setError(null); + + if (sourceType === 'clickhouse') { + const response = await axios.get(`/api/ingestion/tables/${selectedTable}/columns`, { + data: connectionConfig, + }); + setColumns(response.data); + } else if (sourceType === 'flatfile') { + const formData = new FormData(); + formData.append('file', connectionConfig.file); + formData.append('delimiter', ','); + + const response = await axios.post('/api/ingestion/file/columns', formData); + setColumns(response.data); + } + } catch (err) { + setError(err.response?.data || 'Failed to fetch columns'); + toast.error('Failed to fetch columns'); + } finally { + setLoading(false); + } + }; + + fetchColumns(); + }, [sourceType, connectionConfig, selectedTable]); + + const handleToggle = (column) => { + setSelectedColumns((prev) => { + const isSelected = prev.includes(column); + if (isSelected) { + return prev.filter((c) => c !== column); + } else { + return [...prev, column]; + } + }); + }; + + const handleSelectAll = (event) => { + if (event.target.checked) { + setSelectedColumns(columns); + } else { + setSelectedColumns([]); + } + }; + + if (loading) { + return ( + + + + ); + } + + if (error) { + return ( + + {error} + + ); + } + + return ( + + + Select Columns + + + 0 && selectedColumns.length < columns.length} + onChange={handleSelectAll} + /> + } + label="Select All" + /> + + + {columns.map((column) => ( + + + handleToggle(column)} + /> + + + + ))} + + + + + + + ); +} + +export default ColumnSelection; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/ConnectionForm.js b/clickhouse-flatfile-ingestion/frontend/src/components/ConnectionForm.js new file mode 100644 index 000000000..75b2c3be7 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/ConnectionForm.js @@ -0,0 +1,167 @@ +import React, { useState } from 'react'; +import { + Box, + Typography, + TextField, + Button, + FormControlLabel, + Switch, + Grid, +} from '@mui/material'; +import { useDropzone } from 'react-dropzone'; +import { toast } from 'react-toastify'; + +function ConnectionForm({ sourceType, onSubmit }) { + const [formData, setFormData] = useState({ + host: '', + port: '', + database: '', + user: '', + jwtToken: '', + ssl: true, + sslMode: 'STRICT', + }); + + const [file, setFile] = useState(null); + + const { getRootProps, getInputProps } = useDropzone({ + accept: { + 'text/csv': ['.csv'], + 'text/plain': ['.txt'], + }, + maxFiles: 1, + onDrop: (acceptedFiles) => { + setFile(acceptedFiles[0]); + toast.success('File uploaded successfully'); + }, + }); + + const handleChange = (e) => { + const { name, value, checked } = e.target; + setFormData((prev) => ({ + ...prev, + [name]: name === 'ssl' ? checked : value, + })); + }; + + const handleSubmit = (e) => { + e.preventDefault(); + if (sourceType === 'clickhouse') { + onSubmit(formData); + } else if (sourceType === 'flatfile') { + if (!file) { + toast.error('Please upload a file'); + return; + } + onSubmit({ file }); + } + }; + + return ( + + + {sourceType === 'clickhouse' ? 'ClickHouse Connection' : 'File Upload'} + + + {sourceType === 'clickhouse' ? ( + + + + + + + + + + + + + + + + + + + } + label="Use SSL" + /> + + + ) : ( + + + + {file ? file.name : 'Drag and drop a file here, or click to select'} + + + )} + + + + + + ); +} + +export default ConnectionForm; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/DataPreview.js b/clickhouse-flatfile-ingestion/frontend/src/components/DataPreview.js new file mode 100644 index 000000000..88ccd3d10 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/DataPreview.js @@ -0,0 +1,248 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Typography, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + TablePagination, + TextField, + Button, + IconButton, + Tooltip, + CircularProgress, + Alert, + Grid, + FormControl, + InputLabel, + Select, + MenuItem +} from '@mui/material'; +import FilterListIcon from '@mui/icons-material/FilterList'; +import SortIcon from '@mui/icons-material/Sort'; +import DownloadIcon from '@mui/icons-material/Download'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import ingestionService from '../services/ingestionService'; +import { toast } from 'react-toastify'; +import { formatNumber } from '../utils/formatters'; + +function DataPreview({ sourceType, connectionConfig, selectedTable, selectedColumns, onPreview }) { + const [data, setData] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [page, setPage] = useState(0); + const [rowsPerPage, setRowsPerPage] = useState(10); + const [filters, setFilters] = useState({}); + const [sortConfig, setSortConfig] = useState({ field: '', direction: 'asc' }); + const [totalRecords, setTotalRecords] = useState(0); + const [showFilters, setShowFilters] = useState(false); + + useEffect(() => { + fetchData(); + }, [sourceType, connectionConfig, selectedTable, selectedColumns, page, rowsPerPage, filters, sortConfig]); + + const fetchData = async () => { + try { + setLoading(true); + setError(null); + const response = await ingestionService.getPreviewData({ + sourceType, + connectionConfig, + table: selectedTable, + columns: selectedColumns, + page, + pageSize: rowsPerPage, + filters, + sortField: sortConfig.field, + sortDirection: sortConfig.direction + }); + setData(response.data); + setTotalRecords(response.total); + onPreview(response.data); + } catch (error) { + setError(error.message); + toast.error('Failed to fetch preview data: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleChangePage = (event, newPage) => { + setPage(newPage); + }; + + const handleChangeRowsPerPage = (event) => { + setRowsPerPage(parseInt(event.target.value, 10)); + setPage(0); + }; + + const handleSort = (field) => { + setSortConfig({ + field, + direction: sortConfig.field === field && sortConfig.direction === 'asc' ? 'desc' : 'asc' + }); + }; + + const handleFilterChange = (field, value) => { + setFilters(prev => ({ + ...prev, + [field]: value + })); + setPage(0); + }; + + const handleExport = async () => { + try { + const response = await ingestionService.exportPreviewData({ + sourceType, + connectionConfig, + table: selectedTable, + columns: selectedColumns, + filters, + sortField: sortConfig.field, + sortDirection: sortConfig.direction + }); + + // Create download link + const url = window.URL.createObjectURL(new Blob([response])); + const link = document.createElement('a'); + link.href = url; + link.setAttribute('download', `${selectedTable}_preview.csv`); + document.body.appendChild(link); + link.click(); + link.remove(); + + toast.success('Data exported successfully'); + } catch (error) { + toast.error('Failed to export data: ' + error.message); + } + }; + + const handleRefresh = () => { + fetchData(); + }; + + if (loading) { + return ( + + + + ); + } + + if (error) { + return ( + + {error} + + ); + } + + return ( + + + Data Preview + + + + + + + Total Records: {formatNumber(totalRecords)} + + + + + + setShowFilters(!showFilters)}> + + + + + + + + + + + + + + + + + + + {showFilters && ( + + + {selectedColumns.map((column) => ( + + handleFilterChange(column, e.target.value)} + size="small" + /> + + ))} + + + )} + + + + + + {selectedColumns.map((column) => ( + + + {column} + handleSort(column)} + > + + + + + ))} + + + + {data.map((row, index) => ( + + {selectedColumns.map((column) => ( + {row[column]} + ))} + + ))} + +
+
+ + +
+ ); +} + +export default DataPreview; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/FileUpload.js b/clickhouse-flatfile-ingestion/frontend/src/components/FileUpload.js new file mode 100644 index 000000000..39b0ced5b --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/FileUpload.js @@ -0,0 +1,210 @@ +import React, { useState, useRef } from 'react'; +import { + Box, + Card, + CardContent, + Typography, + Button, + Grid, + Alert, + CircularProgress, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + IconButton, + Tooltip, + LinearProgress +} from '@mui/material'; +import DeleteIcon from '@mui/icons-material/Delete'; +import VisibilityIcon from '@mui/icons-material/Visibility'; +import CloudUploadIcon from '@mui/icons-material/CloudUpload'; +import fileService from '../services/fileService'; + +const FileUpload = ({ onFileSelected }) => { + const [files, setFiles] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(''); + const [previewData, setPreviewData] = useState(null); + const [previewLoading, setPreviewLoading] = useState(false); + const [uploadProgress, setUploadProgress] = useState(0); + const fileInputRef = useRef(null); + + const handleFileChange = (event) => { + const selectedFiles = Array.from(event.target.files); + if (selectedFiles.length > 0) { + setFiles(selectedFiles); + } + }; + + const handleUpload = async () => { + if (files.length === 0) { + setError('Please select a file to upload'); + return; + } + + setLoading(true); + setError(''); + setSuccess(''); + setUploadProgress(0); + + try { + const formData = new FormData(); + formData.append('file', files[0]); + + const response = await fileService.upload(formData, (progressEvent) => { + const progress = Math.round((progressEvent.loaded * 100) / progressEvent.total); + setUploadProgress(progress); + }); + + setSuccess('File uploaded successfully!'); + setFiles([]); + if (onFileSelected) { + onFileSelected(response); + } + } catch (error) { + setError('Failed to upload file: ' + error.message); + } finally { + setLoading(false); + setUploadProgress(0); + } + }; + + const handlePreview = async (fileId) => { + setPreviewLoading(true); + setPreviewData(null); + setError(''); + + try { + const response = await fileService.getPreview(fileId); + setPreviewData(response); + } catch (error) { + setError('Failed to load preview: ' + error.message); + } finally { + setPreviewLoading(false); + } + }; + + const handleDelete = async (fileId) => { + if (!window.confirm('Are you sure you want to delete this file?')) { + return; + } + + setLoading(true); + try { + await fileService.delete(fileId); + setSuccess('File deleted successfully!'); + if (previewData && previewData.id === fileId) { + setPreviewData(null); + } + } catch (error) { + setError('Failed to delete file: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleBrowseClick = () => { + fileInputRef.current.click(); + }; + + return ( + + + + File Upload + + + + + + + {files.length > 0 && ( + + Selected: {files[0].name} ({(files[0].size / 1024).toFixed(2)} KB) + + )} + {uploadProgress > 0 && uploadProgress < 100 && ( + + + + {uploadProgress}% + + + )} + + + {error && ( + + {error} + + )} + {success && ( + + {success} + + )} + + {previewData && ( + + + File Preview: {previewData.filename} + + {previewLoading ? ( + + + + ) : ( + + + + + {previewData.headers.map((header, index) => ( + {header} + ))} + + + + {previewData.rows.map((row, rowIndex) => ( + + {row.map((cell, cellIndex) => ( + {cell} + ))} + + ))} + +
+
+ )} +
+ )} +
+
+ ); +}; + +export default FileUpload; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/Header.js b/clickhouse-flatfile-ingestion/frontend/src/components/Header.js new file mode 100644 index 000000000..a23e3b9d8 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/Header.js @@ -0,0 +1,119 @@ +import React, { useState } from 'react'; +import { Link as RouterLink } from 'react-router-dom'; +import { + AppBar, + Toolbar, + Typography, + Button, + IconButton, + Box, + Menu, + MenuItem, + Avatar, + useTheme, + useMediaQuery, +} from '@mui/material'; +import { + Menu as MenuIcon, + Dashboard as DashboardIcon, + Storage as StorageIcon, + Settings as SettingsIcon, + AccountCircle as AccountCircleIcon, +} from '@mui/icons-material'; + +const Header = () => { + const theme = useTheme(); + const isMobile = useMediaQuery(theme.breakpoints.down('sm')); + const [anchorEl, setAnchorEl] = useState(null); + + const handleMenu = (event) => { + setAnchorEl(event.currentTarget); + }; + + const handleClose = () => { + setAnchorEl(null); + }; + + return ( + + + + + Data Ingestion + + + {!isMobile && ( + + + + + + )} + + + + + + Profile + My account + Logout + + + + ); +}; + +export default Header; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/IngestionConfig.js b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionConfig.js new file mode 100644 index 000000000..20d13374f --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionConfig.js @@ -0,0 +1,476 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Card, + CardContent, + Typography, + TextField, + Button, + Grid, + FormControlLabel, + Switch, + Alert, + CircularProgress, + Divider, + FormControl, + InputLabel, + Select, + MenuItem, + InputAdornment, + Tooltip, + IconButton +} from '@mui/material'; +import HelpOutlineIcon from '@mui/icons-material/HelpOutline'; +import SaveIcon from '@mui/icons-material/Save'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import ingestionService from '../services/ingestionService'; + +const IngestionConfig = ({ mappingId }) => { + const [config, setConfig] = useState({ + batchSize: 1000, + maxRetries: 3, + timeout: 30000, + validateData: true, + skipInvalidRecords: true, + compressionEnabled: false, + compressionLevel: 'medium', + deduplicationEnabled: false, + deduplicationWindow: 24, + errorThreshold: 100, + errorAction: 'stop', + loggingLevel: 'info' + }); + + const [loading, setLoading] = useState(true); + const [saving, setSaving] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(''); + + useEffect(() => { + if (mappingId) { + loadConfig(); + } + }, [mappingId]); + + const loadConfig = async () => { + if (!mappingId) return; + + try { + setLoading(true); + const response = await ingestionService.getConfig(mappingId); + setConfig(response); + setError(''); + } catch (error) { + setError('Failed to load configuration: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleChange = (event) => { + const { name, value, checked } = event.target; + setConfig({ + ...config, + [name]: event.target.type === 'checkbox' ? checked : value + }); + }; + + const handleNumberChange = (event) => { + const { name, value } = event.target; + setConfig({ + ...config, + [name]: value === '' ? '' : Number(value) + }); + }; + + const handleSave = async () => { + if (!mappingId) return; + + try { + setSaving(true); + await ingestionService.updateConfig(mappingId, config); + setSuccess('Configuration saved successfully'); + setError(''); + + // Clear success message after 3 seconds + setTimeout(() => { + setSuccess(''); + }, 3000); + } catch (error) { + setError('Failed to save configuration: ' + error.message); + } finally { + setSaving(false); + } + }; + + const handleReset = () => { + loadConfig(); + }; + + const getTooltipText = (field) => { + const tooltips = { + batchSize: 'Number of records to process in a single batch', + maxRetries: 'Maximum number of retry attempts for failed operations', + timeout: 'Timeout in milliseconds for operations', + validateData: 'Validate data before ingestion', + skipInvalidRecords: 'Skip records that fail validation', + compressionEnabled: 'Enable compression for data transfer', + compressionLevel: 'Level of compression to apply', + deduplicationEnabled: 'Enable deduplication of records', + deduplicationWindow: 'Time window in hours for deduplication', + errorThreshold: 'Maximum number of errors before taking action', + errorAction: 'Action to take when error threshold is reached', + loggingLevel: 'Level of detail in logs' + }; + + return tooltips[field] || ''; + }; + + if (loading) { + return ( + + + + + + + + ); + } + + return ( + + + + + Ingestion Configuration + + + + + + + + + + + + {error && ( + + {error} + + )} + + {success && ( + + {success} + + )} + + + + + Performance Settings + + + + + + + + + + ) + }} + /> + + + + + + + + + ) + }} + /> + + + + + + + + + ) + }} + /> + + + Logging Level + + + + + + + Data Processing + + + + } + label={ + + Validate Data + + + + + + + } + /> + + + } + label={ + + Skip Invalid Records + + + + + + + } + /> + + + + + Compression + + + + } + label={ + + Enable Compression + + + + + + + } + /> + + {config.compressionEnabled && ( + + Compression Level + + + )} + + + + + Deduplication + + + + } + label={ + + Enable Deduplication + + + + + + + } + /> + + {config.deduplicationEnabled && ( + + + + + + + + ) + }} + /> + )} + + + + + Error Handling + + + + + + + + + + ) + }} + /> + + + Error Action + + + + + + + ); +}; + +export default IngestionConfig; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/IngestionLogs.js b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionLogs.js new file mode 100644 index 000000000..384c64163 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionLogs.js @@ -0,0 +1,263 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Card, + CardContent, + Typography, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + CircularProgress, + Alert, + IconButton, + Tooltip, + FormControl, + InputLabel, + Select, + MenuItem, + TextField, + Button, + Pagination +} from '@mui/material'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import FilterListIcon from '@mui/icons-material/FilterList'; +import ingestionService from '../services/ingestionService'; + +const IngestionLogs = ({ mappingId }) => { + const [logs, setLogs] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(''); + const [page, setPage] = useState(1); + const [totalPages, setTotalPages] = useState(1); + const [pageSize, setPageSize] = useState(10); + const [logLevel, setLogLevel] = useState('all'); + const [searchTerm, setSearchTerm] = useState(''); + const [showFilters, setShowFilters] = useState(false); + + useEffect(() => { + if (mappingId) { + loadLogs(); + } + }, [mappingId, page, pageSize, logLevel]); + + const loadLogs = async () => { + if (!mappingId) return; + + try { + setLoading(true); + const options = { + page, + pageSize, + level: logLevel !== 'all' ? logLevel : undefined, + search: searchTerm || undefined + }; + + const response = await ingestionService.getLogs(mappingId, options); + setLogs(response.logs || []); + setTotalPages(Math.ceil(response.total / pageSize)); + setError(''); + } catch (error) { + setError('Failed to load logs: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleRefresh = () => { + loadLogs(); + }; + + const handlePageChange = (event, value) => { + setPage(value); + }; + + const handlePageSizeChange = (event) => { + setPageSize(event.target.value); + setPage(1); // Reset to first page when changing page size + }; + + const handleLogLevelChange = (event) => { + setLogLevel(event.target.value); + setPage(1); // Reset to first page when changing filter + }; + + const handleSearchChange = (event) => { + setSearchTerm(event.target.value); + }; + + const handleSearch = () => { + setPage(1); // Reset to first page when searching + loadLogs(); + }; + + const toggleFilters = () => { + setShowFilters(!showFilters); + }; + + const getLogLevelColor = (level) => { + switch (level.toLowerCase()) { + case 'error': + return 'error'; + case 'warn': + case 'warning': + return 'warning'; + case 'info': + return 'info'; + case 'debug': + return 'default'; + default: + return 'default'; + } + }; + + const formatDate = (dateString) => { + if (!dateString) return 'N/A'; + const date = new Date(dateString); + return date.toLocaleString(); + }; + + return ( + + + + + Ingestion Logs + + + + + + + + + + + + + + + + {showFilters && ( + + + + Log Level + + + + + + + + + + )} + + {error && ( + + {error} + + )} + + {loading ? ( + + + + ) : ( + <> + {logs.length > 0 ? ( + <> + + + + + Timestamp + Level + Message + + + + {logs.map((log, index) => ( + + {formatDate(log.timestamp)} + + + {log.level} + + + {log.message} + + ))} + +
+
+ + + + Page Size + + + + + + + ) : ( + + No logs found + + )} + + )} +
+
+ ); +}; + + \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/IngestionStats.js b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionStats.js new file mode 100644 index 000000000..d57ba989c --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionStats.js @@ -0,0 +1,344 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Card, + CardContent, + Typography, + Grid, + CircularProgress, + Alert, + IconButton, + Tooltip, + Divider, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + Button, + Chip +} from '@mui/material'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import DownloadIcon from '@mui/icons-material/Download'; +import ingestionService from '../services/ingestionService'; + +const IngestionStats = ({ mappingId }) => { + const [stats, setStats] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(''); + const [timeRange, setTimeRange] = useState('24h'); // Default to last 24 hours + + useEffect(() => { + if (mappingId) { + loadStats(); + } + }, [mappingId, timeRange]); + + const loadStats = async () => { + if (!mappingId) return; + + try { + setLoading(true); + const response = await ingestionService.getStats(mappingId, { timeRange }); + setStats(response); + setError(''); + } catch (error) { + setError('Failed to load statistics: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleRefresh = () => { + loadStats(); + }; + + const handleTimeRangeChange = (range) => { + setTimeRange(range); + }; + + const handleDownloadReport = () => { + if (!stats) return; + + // Create a CSV string from the stats data + const csvContent = generateCSV(stats); + + // Create a blob and download link + const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); + const url = URL.createObjectURL(blob); + const link = document.createElement('a'); + link.setAttribute('href', url); + link.setAttribute('download', `ingestion-stats-${mappingId}-${new Date().toISOString()}.csv`); + link.style.visibility = 'hidden'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + }; + + const generateCSV = (data) => { + // Simple CSV generation for the stats data + const headers = ['Metric', 'Value']; + const rows = [ + ['Total Records Processed', data.totalRecords], + ['Success Rate', `${data.successRate}%`], + ['Average Processing Time', `${data.avgProcessingTime}ms`], + ['Failed Records', data.failedRecords], + ['Last Successful Ingestion', data.lastSuccessfulIngestion], + ['Last Failed Ingestion', data.lastFailedIngestion || 'N/A'] + ]; + + return [ + headers.join(','), + ...rows.map(row => row.join(',')) + ].join('\n'); + }; + + const formatDate = (dateString) => { + if (!dateString) return 'N/A'; + const date = new Date(dateString); + return date.toLocaleString(); + }; + + const getStatusColor = (status) => { + switch (status) { + case 'success': + return 'success'; + case 'failed': + return 'error'; + case 'in_progress': + return 'info'; + default: + return 'default'; + } + }; + + if (loading) { + return ( + + + + + + + + ); + } + + return ( + + + + + Ingestion Statistics + + + + + + + + + + + + + + + + {error && ( + + {error} + + )} + + {stats ? ( + <> + + + + + + + + + + + + Total Records Processed + + + {stats.totalRecords.toLocaleString()} + + + + + + + + + Success Rate + + = 95 ? 'success.main' : 'warning.main'}> + {stats.successRate}% + + + + + + + + + Average Processing Time + + + {stats.avgProcessingTime}ms + + + + + + + + + + Recent Ingestion Jobs + + + + + + + Job ID + Start Time + End Time + Records Processed + Status + + + + {stats.recentJobs && stats.recentJobs.length > 0 ? ( + stats.recentJobs.map((job) => ( + + {job.id} + {formatDate(job.startTime)} + {formatDate(job.endTime)} + {job.recordsProcessed.toLocaleString()} + + + + + )) + ) : ( + + + No recent jobs found + + + )} + +
+
+ + + + + Performance Metrics + + + + + + + + Throughput (Records/Second) + + + {stats.throughput.toLocaleString()} + + + + + + + + + Failed Records + + + {stats.failedRecords.toLocaleString()} + + + + + + + + + + Last Ingestion Details + + + + + + + + Last Successful Ingestion + + + {formatDate(stats.lastSuccessfulIngestion)} + + + + + + + + + Last Failed Ingestion + + + {formatDate(stats.lastFailedIngestion) || 'N/A'} + + + + + + + ) : ( + + No statistics available + + )} +
+
+ ); +}; + +export default IngestionStats; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/IngestionStatus.js b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionStatus.js new file mode 100644 index 000000000..6434be77a --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionStatus.js @@ -0,0 +1,242 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Card, + CardContent, + Typography, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + Chip, + CircularProgress, + Alert, + IconButton, + Tooltip +} from '@mui/material'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import StopIcon from '@mui/icons-material/Stop'; +import PlayArrowIcon from '@mui/icons-material/PlayArrow'; +import ingestionService from '../services/ingestionService'; + +const statusColors = { + RUNNING: 'primary', + COMPLETED: 'success', + FAILED: 'error', + STOPPED: 'warning', + PENDING: 'default' +}; + +const IngestionStatus = ({ mappingId, onStatusChange }) => { + const [status, setStatus] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(''); + const [recentJobs, setRecentJobs] = useState([]); + const [pollingInterval, setPollingInterval] = useState(null); + + useEffect(() => { + loadStatus(); + + // Set up polling for status updates + const interval = setInterval(() => { + loadStatus(); + }, 5000); // Poll every 5 seconds + + setPollingInterval(interval); + + return () => { + if (interval) { + clearInterval(interval); + } + }; + }, [mappingId]); + + const loadStatus = async () => { + if (!mappingId) return; + + try { + setLoading(true); + const response = await ingestionService.getStatus(mappingId); + setStatus(response.status); + setRecentJobs(response.recentJobs || []); + setError(''); + + if (onStatusChange) { + onStatusChange(response.status); + } + } catch (error) { + setError('Failed to load ingestion status: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleStartIngestion = async () => { + try { + setLoading(true); + await ingestionService.startIngestion(mappingId); + await loadStatus(); + } catch (error) { + setError('Failed to start ingestion: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleStopIngestion = async () => { + try { + setLoading(true); + await ingestionService.stopIngestion(mappingId); + await loadStatus(); + } catch (error) { + setError('Failed to stop ingestion: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleRefresh = () => { + loadStatus(); + }; + + const formatDate = (dateString) => { + if (!dateString) return 'N/A'; + const date = new Date(dateString); + return date.toLocaleString(); + }; + + return ( + + + + + Ingestion Status + + + + + + + + {status?.state === 'RUNNING' && ( + + + + + + )} + {status?.state !== 'RUNNING' && ( + + + + + + )} + + + + {error && ( + + {error} + + )} + + {loading ? ( + + + + ) : ( + <> + {status && ( + + + Current Status: + + + + + Last Updated: {formatDate(status.lastUpdated)} + + + {status.progress && ( + + + Progress: {status.progress.percentage}% + + + Processed: {status.progress.processedRecords} records + + {status.progress.errors > 0 && ( + + Errors: {status.progress.errors} + + )} + + )} + + )} + + + Recent Jobs + + + + + + Job ID + Status + Started + Completed + Records + + + + {recentJobs.length > 0 ? ( + recentJobs.map((job) => ( + + {job.id} + + + + {formatDate(job.startTime)} + {formatDate(job.endTime)} + {job.processedRecords || 0} + + )) + ) : ( + + + No recent jobs found + + + )} + +
+
+ + )} +
+
+ ); +}; + +export default IngestionStatus; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/IngestionTool.js b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionTool.js new file mode 100644 index 000000000..f7aa7ea07 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/IngestionTool.js @@ -0,0 +1,255 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Paper, + Typography, + Stepper, + Step, + StepLabel, + Button, + Alert, + CircularProgress +} from '@mui/material'; +import { toast } from 'react-toastify'; +import SourceSelection from './SourceSelection'; +import ConnectionForm from './ConnectionForm'; +import SchemaSelection from './SchemaSelection'; +import ColumnSelection from './ColumnSelection'; +import DataPreview from './DataPreview'; +import ProgressDisplay from './ProgressDisplay'; +import ingestionService from '../services/ingestionService'; +import dataTypeMapper from '../utils/dataTypeMapper'; + +const steps = [ + 'Select Source', + 'Configure Connection', + 'Select Schema', + 'Select Columns', + 'Preview Data', + 'Execute Ingestion' +]; + +function IngestionTool() { + const [activeStep, setActiveStep] = useState(0); + const [sourceType, setSourceType] = useState(null); + const [connectionConfig, setConnectionConfig] = useState(null); + const [selectedTable, setSelectedTable] = useState(null); + const [availableColumns, setAvailableColumns] = useState([]); + const [selectedColumns, setSelectedColumns] = useState([]); + const [previewData, setPreviewData] = useState(null); + const [progress, setProgress] = useState(0); + const [isProcessing, setIsProcessing] = useState(false); + const [recordCount, setRecordCount] = useState(null); + const [error, setError] = useState(null); + const [jobId, setJobId] = useState(null); + + useEffect(() => { + if (jobId) { + const progressInterval = setInterval(async () => { + try { + const progressData = await ingestionService.getProgress(jobId); + setProgress(progressData.progress); + if (progressData.status === 'COMPLETED') { + clearInterval(progressInterval); + setIsProcessing(false); + toast.success('Ingestion completed successfully!'); + } else if (progressData.status === 'FAILED') { + clearInterval(progressInterval); + setIsProcessing(false); + setError(progressData.error); + toast.error('Ingestion failed: ' + progressData.error); + } + } catch (error) { + clearInterval(progressInterval); + setIsProcessing(false); + setError(error.message); + toast.error('Failed to fetch progress: ' + error.message); + } + }, 1000); + + return () => clearInterval(progressInterval); + } + }, [jobId]); + + const handleNext = () => { + setActiveStep((prevStep) => prevStep + 1); + }; + + const handleBack = () => { + setActiveStep((prevStep) => prevStep - 1); + }; + + const handleSourceSelect = (source) => { + setSourceType(source); + handleNext(); + }; + + const handleConnectionSubmit = async (config) => { + try { + setConnectionConfig(config); + // Get record count + const count = await ingestionService.getRecordCount(sourceType, config); + setRecordCount(count); + handleNext(); + } catch (error) { + setError(error.message); + toast.error('Failed to get record count: ' + error.message); + } + }; + + const handleTableSelect = async (table) => { + try { + setSelectedTable(table); + // Get schema information + const schema = await ingestionService.getSchema(sourceType, { + ...connectionConfig, + table + }); + setAvailableColumns(schema.columns); + handleNext(); + } catch (error) { + setError(error.message); + toast.error('Failed to get schema: ' + error.message); + } + }; + + const handleColumnSelect = (columns) => { + setSelectedColumns(columns); + handleNext(); + }; + + const handlePreview = async (data) => { + setPreviewData(data); + handleNext(); + }; + + const handleExecute = async () => { + setIsProcessing(true); + setError(null); + try { + const config = { + sourceType, + connectionConfig, + selectedTable, + selectedColumns + }; + + let response; + if (sourceType === 'clickhouse') { + response = await ingestionService.exportToFile(config); + } else { + response = await ingestionService.importFromFile(config); + } + + setJobId(response.jobId); + toast.success('Ingestion started successfully!'); + } catch (error) { + setIsProcessing(false); + setError(error.message); + toast.error('Failed to start ingestion: ' + error.message); + } + }; + + const getStepContent = (step) => { + switch (step) { + case 0: + return ; + case 1: + return ( + + ); + case 2: + return ( + + ); + case 3: + return ( + + ); + case 4: + return ( + + ); + case 5: + return ( + + ); + default: + return 'Unknown step'; + } + }; + + return ( + + + Data Ingestion Tool + + + {steps.map((label) => ( + + {label} + + ))} + + + {error && ( + + {error} + + )} + {getStepContent(activeStep)} + + + {activeStep === steps.length - 1 ? ( + + ) : ( + + )} + + + + ); +} + +export default IngestionTool; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/ProgressDisplay.js b/clickhouse-flatfile-ingestion/frontend/src/components/ProgressDisplay.js new file mode 100644 index 000000000..42067533d --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/ProgressDisplay.js @@ -0,0 +1,87 @@ +import React from 'react'; +import { + Box, + Typography, + LinearProgress, + Paper, + Button, + Grid, + Card, + CardContent +} from '@mui/material'; +import { formatNumber } from '../utils/formatters'; + +function ProgressDisplay({ progress, isProcessing, onExecute, recordCount }) { + return ( + + + Data Ingestion Progress + + + + + + + + Record Count + + + {recordCount ? formatNumber(recordCount) : 'N/A'} + + + + + + + + + Progress + + + {progress}% + + + + + + + + + + {isProcessing ? 'Processing...' : 'Ready to start ingestion'} + + + + {progress}% Complete + + + + + + {isProcessing ? ( + 'Please wait while the data is being processed...' + ) : ( + 'Click the button below to start the ingestion process.' + )} + + + + + + + + + ); +} + +export default ProgressDisplay; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/SchemaMapping.js b/clickhouse-flatfile-ingestion/frontend/src/components/SchemaMapping.js new file mode 100644 index 000000000..e2ffb0f07 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/SchemaMapping.js @@ -0,0 +1,216 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Paper, + Typography, + Grid, + FormControl, + InputLabel, + Select, + MenuItem, + Button, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + IconButton, + Alert, + CircularProgress +} from '@mui/material'; +import DeleteIcon from '@mui/icons-material/Delete'; +import AddIcon from '@mui/icons-material/Add'; +import clickhouseService from '../services/clickhouseService'; + +const SchemaMapping = ({ + clickhouseConfig, + selectedTable, + fileSchema, + onMappingComplete +}) => { + const [clickhouseSchema, setClickhouseSchema] = useState([]); + const [mappings, setMappings] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(''); + + useEffect(() => { + if (clickhouseConfig && selectedTable) { + loadClickHouseSchema(); + } + }, [clickhouseConfig, selectedTable]); + + const loadClickHouseSchema = async () => { + setLoading(true); + setError(''); + try { + const response = await clickhouseService.getTableSchema( + clickhouseConfig, + selectedTable + ); + setClickhouseSchema(response.data); + } catch (err) { + setError('Failed to load ClickHouse schema'); + } finally { + setLoading(false); + } + }; + + const handleAddMapping = () => { + setMappings([ + ...mappings, + { sourceColumn: '', targetColumn: '', dataType: '' } + ]); + }; + + const handleRemoveMapping = (index) => { + const newMappings = mappings.filter((_, i) => i !== index); + setMappings(newMappings); + }; + + const handleMappingChange = (index, field, value) => { + const newMappings = mappings.map((mapping, i) => { + if (i === index) { + return { ...mapping, [field]: value }; + } + return mapping; + }); + setMappings(newMappings); + }; + + const handleSaveMapping = () => { + // Validate mappings + const isValid = mappings.every(mapping => + mapping.sourceColumn && mapping.targetColumn + ); + + if (!isValid) { + setError('Please fill in all mapping fields'); + return; + } + + setSuccess('Schema mapping saved successfully'); + if (onMappingComplete) { + onMappingComplete(mappings); + } + }; + + return ( + + + Schema Mapping + + + {loading ? ( + + + + ) : ( + <> + + + + + Source Column + Target Column + Data Type + Actions + + + + {mappings.map((mapping, index) => ( + + + + + + + + + + + + + + + + + + handleRemoveMapping(index)} + color="error" + > + + + + + ))} + +
+
+ + + + + + + {error && ( + + {error} + + )} + + {success && ( + + {success} + + )} + + )} +
+ ); +}; + +export default SchemaMapping; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/SchemaSelection.js b/clickhouse-flatfile-ingestion/frontend/src/components/SchemaSelection.js new file mode 100644 index 000000000..3aef72092 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/SchemaSelection.js @@ -0,0 +1,81 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Typography, + List, + ListItem, + ListItemButton, + ListItemText, + CircularProgress, + Alert, +} from '@mui/material'; +import axios from 'axios'; +import { toast } from 'react-toastify'; + +function SchemaSelection({ sourceType, connectionConfig, onSelect }) { + const [tables, setTables] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + useEffect(() => { + const fetchTables = async () => { + try { + setLoading(true); + setError(null); + + if (sourceType === 'clickhouse') { + const response = await axios.post('/api/ingestion/connect', connectionConfig); + setTables(response.data); + } else if (sourceType === 'flatfile') { + // For flat files, we'll use the file name as the table name + setTables([{ name: connectionConfig.file.name }]); + } + } catch (err) { + setError(err.response?.data || 'Failed to fetch tables'); + toast.error('Failed to fetch tables'); + } finally { + setLoading(false); + } + }; + + fetchTables(); + }, [sourceType, connectionConfig]); + + if (loading) { + return ( + + + + ); + } + + if (error) { + return ( + + {error} + + ); + } + + return ( + + + Select {sourceType === 'clickhouse' ? 'Table' : 'File'} + + + {tables.map((table) => ( + + onSelect(table.name)}> + + + + ))} + + + ); +} + +export default SchemaSelection; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/SourceSelection.js b/clickhouse-flatfile-ingestion/frontend/src/components/SourceSelection.js new file mode 100644 index 000000000..1290c3168 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/SourceSelection.js @@ -0,0 +1,66 @@ +import React from 'react'; +import { + Box, + Typography, + Card, + CardContent, + CardActionArea, + Grid, +} from '@mui/material'; +import StorageIcon from '@mui/icons-material/Storage'; +import DescriptionIcon from '@mui/icons-material/Description'; + +function SourceSelection({ onSelect }) { + const sources = [ + { + id: 'clickhouse', + name: 'ClickHouse Database', + description: 'Import data from ClickHouse database', + icon: , + }, + { + id: 'flatfile', + name: 'Flat File', + description: 'Import data from CSV or text file', + icon: , + }, + ]; + + return ( + + + Select Data Source + + + {sources.map((source) => ( + + + onSelect(source.id)}> + + + {source.icon} + + {source.name} + + + {source.description} + + + + + + + ))} + + + ); +} + +export default SourceSelection; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/TableJoin.js b/clickhouse-flatfile-ingestion/frontend/src/components/TableJoin.js new file mode 100644 index 000000000..febd3e3f5 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/TableJoin.js @@ -0,0 +1,288 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Typography, + Paper, + Grid, + TextField, + Button, + IconButton, + Select, + MenuItem, + FormControl, + InputLabel, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + CircularProgress, + Alert +} from '@mui/material'; +import AddIcon from '@mui/icons-material/Add'; +import DeleteIcon from '@mui/icons-material/Delete'; +import ingestionService from '../services/ingestionService'; +import { toast } from 'react-toastify'; + +function TableJoin({ sourceType, connectionConfig, onJoinComplete }) { + const [tables, setTables] = useState([]); + const [selectedTables, setSelectedTables] = useState([]); + const [joinConditions, setJoinConditions] = useState([]); + const [previewData, setPreviewData] = useState(null); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + fetchTables(); + }, [sourceType, connectionConfig]); + + const fetchTables = async () => { + try { + setLoading(true); + const response = await ingestionService.getSchema(sourceType, connectionConfig); + setTables(response.tables || []); + } catch (error) { + setError(error.message); + toast.error('Failed to fetch tables: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleAddTable = () => { + if (selectedTables.length < tables.length) { + setSelectedTables([...selectedTables, { table: '', columns: [] }]); + } + }; + + const handleRemoveTable = (index) => { + const newSelectedTables = selectedTables.filter((_, i) => i !== index); + setSelectedTables(newSelectedTables); + setJoinConditions(joinConditions.filter((_, i) => i !== index)); + }; + + const handleTableChange = (index, table) => { + const newSelectedTables = [...selectedTables]; + newSelectedTables[index] = { ...newSelectedTables[index], table }; + setSelectedTables(newSelectedTables); + }; + + const handleAddJoinCondition = () => { + if (selectedTables.length >= 2) { + setJoinConditions([...joinConditions, { leftTable: '', leftColumn: '', rightTable: '', rightColumn: '', type: 'INNER' }]); + } + }; + + const handleJoinConditionChange = (index, field, value) => { + const newConditions = [...joinConditions]; + newConditions[index] = { ...newConditions[index], [field]: value }; + setJoinConditions(newConditions); + }; + + const handlePreview = async () => { + try { + setLoading(true); + setError(null); + const response = await ingestionService.previewJoin({ + sourceType, + connectionConfig, + tables: selectedTables, + joinConditions + }); + setPreviewData(response.data); + toast.success('Preview generated successfully'); + } catch (error) { + setError(error.message); + toast.error('Failed to generate preview: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleComplete = () => { + onJoinComplete({ + tables: selectedTables, + joinConditions + }); + }; + + return ( + + + Multi-Table Join Configuration + + + {error && ( + + {error} + + )} + + + + Selected Tables + + {selectedTables.map((table, index) => ( + + + + Select Table + + + + + handleRemoveTable(index)} color="error"> + + + + + ))} + + + + + + Join Conditions + + {joinConditions.map((condition, index) => ( + + + + Left Table + + + + + handleJoinConditionChange(index, 'leftColumn', e.target.value)} + /> + + + + Join Type + + + + + handleJoinConditionChange(index, 'rightColumn', e.target.value)} + /> + + + + Right Table + + + + + ))} + + + + {previewData && ( + + + Preview Data + + + + + + {Object.keys(previewData[0] || {}).map((column) => ( + {column} + ))} + + + + {previewData.slice(0, 5).map((row, index) => ( + + {Object.values(row).map((value, i) => ( + {value} + ))} + + ))} + +
+
+
+ )} + + + + + +
+ ); +} + +export default TableJoin; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/TableMapping.js b/clickhouse-flatfile-ingestion/frontend/src/components/TableMapping.js new file mode 100644 index 000000000..a57fcc5ed --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/TableMapping.js @@ -0,0 +1,240 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Card, + CardContent, + TextField, + Button, + Grid, + Typography, + Alert, + CircularProgress, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + IconButton, + Tooltip +} from '@mui/material'; +import DeleteIcon from '@mui/icons-material/Delete'; +import EditIcon from '@mui/icons-material/Edit'; +import clickhouseService from '../services/clickhouseService'; + +const TableMapping = ({ config }) => { + const [mappings, setMappings] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(''); + const [editingMapping, setEditingMapping] = useState(null); + const [formData, setFormData] = useState({ + name: '', + sourceFile: '', + targetTable: '', + columnMappings: [] + }); + + useEffect(() => { + if (config) { + loadMappings(); + } + }, [config]); + + const loadMappings = async () => { + setLoading(true); + try { + const response = await clickhouseService.getMappings(config); + setMappings(response); + } catch (error) { + setError('Failed to load mappings: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleChange = (e) => { + const { name, value } = e.target; + setFormData(prev => ({ + ...prev, + [name]: value + })); + }; + + const handleSubmit = async (e) => { + e.preventDefault(); + setLoading(true); + setError(''); + setSuccess(''); + + try { + if (editingMapping) { + await clickhouseService.saveMapping(config, { + ...formData, + id: editingMapping.id + }); + setSuccess('Mapping updated successfully!'); + } else { + await clickhouseService.saveMapping(config, formData); + setSuccess('Mapping created successfully!'); + } + loadMappings(); + resetForm(); + } catch (error) { + setError('Failed to save mapping: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleDelete = async (mappingId) => { + if (!window.confirm('Are you sure you want to delete this mapping?')) { + return; + } + + setLoading(true); + try { + await clickhouseService.deleteMapping(config, mappingId); + setSuccess('Mapping deleted successfully!'); + loadMappings(); + } catch (error) { + setError('Failed to delete mapping: ' + error.message); + } finally { + setLoading(false); + } + }; + + const handleEdit = (mapping) => { + setEditingMapping(mapping); + setFormData({ + name: mapping.name, + sourceFile: mapping.sourceFile, + targetTable: mapping.targetTable, + columnMappings: mapping.columnMappings + }); + }; + + const resetForm = () => { + setEditingMapping(null); + setFormData({ + name: '', + sourceFile: '', + targetTable: '', + columnMappings: [] + }); + }; + + return ( + + + + Table Mappings + + +
+ + + + + + + + + + + + + + {error && ( + + {error} + + )} + {success && ( + + {success} + + )} + + {editingMapping && ( + + )} + +
+ + + + + + + Name + Source File + Target Table + Actions + + + + {mappings.map((mapping) => ( + + {mapping.name} + {mapping.sourceFile} + {mapping.targetTable} + + + handleEdit(mapping)}> + + + + + handleDelete(mapping.id)}> + + + + + + ))} + +
+
+
+
+
+ ); +}; + +export default TableMapping; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ColumnSelection.test.js b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ColumnSelection.test.js new file mode 100644 index 000000000..eaf183ff0 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ColumnSelection.test.js @@ -0,0 +1,211 @@ +import React from 'react'; +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { vi } from 'vitest'; +import ColumnSelection from '../ColumnSelection'; +import { toast } from 'react-toastify'; +import axios from 'axios'; + +vi.mock('axios'); + +describe('ColumnSelection Component', () => { + const mockOnColumnsSelect = vi.fn(); + const mockConnectionConfig = { + host: 'localhost', + port: 8123, + database: 'test_db', + user: 'test_user', + password: 'test_pass' + }; + + const mockColumns = [ + { name: 'id', type: 'Int32', nullable: false }, + { name: 'name', type: 'String', nullable: true }, + { name: 'email', type: 'String', nullable: true }, + { name: 'created_at', type: 'DateTime', nullable: false } + ]; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('renders loading state initially', () => { + render( + + ); + + expect(screen.getByText('Loading columns...')).toBeInTheDocument(); + }); + + test('renders columns from ClickHouse correctly', async () => { + axios.get.mockResolvedValueOnce({ data: mockColumns }); + + render( + + ); + + await waitFor(() => { + mockColumns.forEach(column => { + expect(screen.getByText(column.name)).toBeInTheDocument(); + expect(screen.getByText(column.type)).toBeInTheDocument(); + expect(screen.getByText(column.nullable ? 'Nullable' : 'Required')).toBeInTheDocument(); + }); + }); + }); + + test('renders columns from flat file correctly', async () => { + const mockFileColumns = [ + { name: 'id', type: 'number' }, + { name: 'name', type: 'string' } + ]; + axios.get.mockResolvedValueOnce({ data: mockFileColumns }); + + render( + + ); + + await waitFor(() => { + mockFileColumns.forEach(column => { + expect(screen.getByText(column.name)).toBeInTheDocument(); + expect(screen.getByText(column.type)).toBeInTheDocument(); + }); + }); + }); + + test('handles individual column selection', async () => { + axios.get.mockResolvedValueOnce({ data: mockColumns }); + + render( + + ); + + await waitFor(() => { + const checkbox = screen.getByLabelText('id'); + fireEvent.click(checkbox); + }); + + expect(mockOnColumnsSelect).toHaveBeenCalledWith(['id']); + }); + + test('handles select all functionality', async () => { + axios.get.mockResolvedValueOnce({ data: mockColumns }); + + render( + + ); + + await waitFor(() => { + const selectAllCheckbox = screen.getByLabelText('Select All'); + fireEvent.click(selectAllCheckbox); + }); + + expect(mockOnColumnsSelect).toHaveBeenCalledWith(mockColumns.map(col => col.name)); + }); + + test('handles API error for ClickHouse columns', async () => { + const errorMessage = 'Failed to fetch columns'; + axios.get.mockRejectedValueOnce(new Error(errorMessage)); + + render( + + ); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith(`Error fetching columns: ${errorMessage}`); + }); + expect(screen.getByText('No columns available')).toBeInTheDocument(); + }); + + test('handles API error for flat file columns', async () => { + const errorMessage = 'Failed to fetch file columns'; + axios.get.mockRejectedValueOnce(new Error(errorMessage)); + + render( + + ); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith(`Error fetching columns: ${errorMessage}`); + }); + expect(screen.getByText('No columns available')).toBeInTheDocument(); + }); + + test('displays empty state when no columns are available', async () => { + axios.get.mockResolvedValueOnce({ data: [] }); + + render( + + ); + + await waitFor(() => { + expect(screen.getByText('No columns available')).toBeInTheDocument(); + }); + }); + + test('refreshes columns when table selection changes', async () => { + const { rerender } = render( + + ); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(1); + }); + + rerender( + + ); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(2); + }); + }); +}); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ConnectionForm.test.js b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ConnectionForm.test.js new file mode 100644 index 000000000..637e1e55e --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ConnectionForm.test.js @@ -0,0 +1,167 @@ +import React from 'react'; +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { vi } from 'vitest'; +import ConnectionForm from '../ConnectionForm'; +import { toast } from 'react-toastify'; + +describe('ConnectionForm Component', () => { + const mockOnSubmit = vi.fn(); + const mockOnFileUpload = vi.fn(); + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('renders ClickHouse connection form when source is clickhouse', () => { + render(); + + expect(screen.getByLabelText('Host')).toBeInTheDocument(); + expect(screen.getByLabelText('Port')).toBeInTheDocument(); + expect(screen.getByLabelText('Database')).toBeInTheDocument(); + expect(screen.getByLabelText('User')).toBeInTheDocument(); + expect(screen.getByLabelText('Password')).toBeInTheDocument(); + expect(screen.getByText('Connect')).toBeInTheDocument(); + }); + + test('renders file upload form when source is flatfile', () => { + render(); + + expect(screen.getByText('Drag and drop a file here, or click to select')).toBeInTheDocument(); + expect(screen.getByText('Supported formats: CSV, JSON, Excel')).toBeInTheDocument(); + }); + + test('handles file drop correctly', async () => { + render(); + + const file = new File(['test data'], 'test.csv', { type: 'text/csv' }); + const dropzone = screen.getByText('Drag and drop a file here, or click to select'); + + // Simulate file drop + fireEvent.drop(dropzone, { + dataTransfer: { + files: [file] + } + }); + + await waitFor(() => { + expect(mockOnFileUpload).toHaveBeenCalledWith(file); + }); + }); + + test('handles file selection through click correctly', async () => { + render(); + + const file = new File(['test data'], 'test.csv', { type: 'text/csv' }); + const input = screen.getByTestId('file-input'); + + // Simulate file selection + fireEvent.change(input, { + target: { + files: [file] + } + }); + + await waitFor(() => { + expect(mockOnFileUpload).toHaveBeenCalledWith(file); + }); + }); + + test('validates file type correctly', async () => { + render(); + + const invalidFile = new File(['test data'], 'test.txt', { type: 'text/plain' }); + const dropzone = screen.getByText('Drag and drop a file here, or click to select'); + + // Simulate invalid file drop + fireEvent.drop(dropzone, { + dataTransfer: { + files: [invalidFile] + } + }); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith('Invalid file type. Please upload a CSV, JSON, or Excel file.'); + }); + expect(mockOnFileUpload).not.toHaveBeenCalled(); + }); + + test('handles ClickHouse connection submission correctly', async () => { + render(); + + // Fill connection form + fireEvent.change(screen.getByLabelText('Host'), { target: { value: 'localhost' } }); + fireEvent.change(screen.getByLabelText('Port'), { target: { value: '8123' } }); + fireEvent.change(screen.getByLabelText('Database'), { target: { value: 'test_db' } }); + fireEvent.change(screen.getByLabelText('User'), { target: { value: 'test_user' } }); + fireEvent.change(screen.getByLabelText('Password'), { target: { value: 'test_pass' } }); + + // Submit form + fireEvent.click(screen.getByText('Connect')); + + await waitFor(() => { + expect(mockOnSubmit).toHaveBeenCalledWith({ + host: 'localhost', + port: 8123, + database: 'test_db', + user: 'test_user', + password: 'test_pass' + }); + }); + }); + + test('validates required fields in ClickHouse form', async () => { + render(); + + // Submit form without filling required fields + fireEvent.click(screen.getByText('Connect')); + + await waitFor(() => { + expect(screen.getByText('Host is required')).toBeInTheDocument(); + expect(screen.getByText('Port is required')).toBeInTheDocument(); + expect(screen.getByText('Database is required')).toBeInTheDocument(); + expect(screen.getByText('User is required')).toBeInTheDocument(); + expect(screen.getByText('Password is required')).toBeInTheDocument(); + }); + expect(mockOnSubmit).not.toHaveBeenCalled(); + }); + + test('validates port number format', async () => { + render(); + + // Fill form with invalid port + fireEvent.change(screen.getByLabelText('Host'), { target: { value: 'localhost' } }); + fireEvent.change(screen.getByLabelText('Port'), { target: { value: 'invalid' } }); + fireEvent.change(screen.getByLabelText('Database'), { target: { value: 'test_db' } }); + fireEvent.change(screen.getByLabelText('User'), { target: { value: 'test_user' } }); + fireEvent.change(screen.getByLabelText('Password'), { target: { value: 'test_pass' } }); + + // Submit form + fireEvent.click(screen.getByText('Connect')); + + await waitFor(() => { + expect(screen.getByText('Port must be a valid number')).toBeInTheDocument(); + }); + expect(mockOnSubmit).not.toHaveBeenCalled(); + }); + + test('handles file upload errors correctly', async () => { + const mockError = new Error('Upload failed'); + mockOnFileUpload.mockRejectedValue(mockError); + + render(); + + const file = new File(['test data'], 'test.csv', { type: 'text/csv' }); + const dropzone = screen.getByText('Drag and drop a file here, or click to select'); + + // Simulate file drop + fireEvent.drop(dropzone, { + dataTransfer: { + files: [file] + } + }); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith('Failed to upload file: Upload failed'); + }); + }); +}); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/DataPreview.test.js b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/DataPreview.test.js new file mode 100644 index 000000000..b52beb5e5 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/DataPreview.test.js @@ -0,0 +1,229 @@ +import React from 'react'; +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { vi } from 'vitest'; +import DataPreview from '../DataPreview'; +import { toast } from 'react-toastify'; +import axios from 'axios'; + +vi.mock('axios'); + +describe('DataPreview Component', () => { + const mockConnectionConfig = { + host: 'localhost', + port: 8123, + database: 'test_db', + user: 'test_user', + password: 'test_pass' + }; + + const mockPreviewData = { + columns: ['id', 'name', 'email', 'created_at'], + data: [ + { id: 1, name: 'John Doe', email: 'john@example.com', created_at: '2024-01-01' }, + { id: 2, name: 'Jane Smith', email: 'jane@example.com', created_at: '2024-01-02' }, + { id: 3, name: 'Bob Johnson', email: 'bob@example.com', created_at: '2024-01-03' } + ], + totalRows: 3 + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('renders loading state initially', () => { + render( + + ); + + expect(screen.getByText('Loading preview data...')).toBeInTheDocument(); + }); + + test('renders preview data from ClickHouse correctly', async () => { + axios.get.mockResolvedValueOnce({ data: mockPreviewData }); + + render( + + ); + + await waitFor(() => { + mockPreviewData.columns.forEach(column => { + expect(screen.getByText(column)).toBeInTheDocument(); + }); + + mockPreviewData.data.forEach(row => { + expect(screen.getByText(row.id.toString())).toBeInTheDocument(); + expect(screen.getByText(row.name)).toBeInTheDocument(); + expect(screen.getByText(row.email)).toBeInTheDocument(); + }); + }); + }); + + test('renders preview data from flat file correctly', async () => { + axios.get.mockResolvedValueOnce({ data: mockPreviewData }); + + render( + + ); + + await waitFor(() => { + mockPreviewData.columns.forEach(column => { + expect(screen.getByText(column)).toBeInTheDocument(); + }); + + mockPreviewData.data.forEach(row => { + expect(screen.getByText(row.id.toString())).toBeInTheDocument(); + expect(screen.getByText(row.name)).toBeInTheDocument(); + expect(screen.getByText(row.email)).toBeInTheDocument(); + }); + }); + }); + + test('handles pagination correctly', async () => { + const mockPaginatedData = { + ...mockPreviewData, + data: mockPreviewData.data.slice(0, 2), + totalRows: 5 + }; + axios.get.mockResolvedValueOnce({ data: mockPaginatedData }); + + render( + + ); + + await waitFor(() => { + expect(screen.getByText('1-2 of 5')).toBeInTheDocument(); + }); + + const nextButton = screen.getByLabelText('Next page'); + fireEvent.click(nextButton); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(2); + }); + }); + + test('handles rows per page change', async () => { + axios.get.mockResolvedValueOnce({ data: mockPreviewData }); + + render( + + ); + + await waitFor(() => { + const rowsPerPageSelect = screen.getByLabelText('Rows per page:'); + fireEvent.change(rowsPerPageSelect, { target: { value: '25' } }); + }); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(2); + }); + }); + + test('handles API error for ClickHouse preview', async () => { + const errorMessage = 'Failed to fetch preview data'; + axios.get.mockRejectedValueOnce(new Error(errorMessage)); + + render( + + ); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith(`Error fetching preview data: ${errorMessage}`); + }); + expect(screen.getByText('No preview data available')).toBeInTheDocument(); + }); + + test('handles API error for flat file preview', async () => { + const errorMessage = 'Failed to fetch file preview'; + axios.get.mockRejectedValueOnce(new Error(errorMessage)); + + render( + + ); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith(`Error fetching preview data: ${errorMessage}`); + }); + expect(screen.getByText('No preview data available')).toBeInTheDocument(); + }); + + test('displays empty state when no preview data is available', async () => { + axios.get.mockResolvedValueOnce({ data: { columns: [], data: [], totalRows: 0 } }); + + render( + + ); + + await waitFor(() => { + expect(screen.getByText('No preview data available')).toBeInTheDocument(); + }); + }); + + test('refreshes preview when selected columns change', async () => { + const { rerender } = render( + + ); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(1); + }); + + rerender( + + ); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(2); + }); + }); +}); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/IngestionTool.test.js b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/IngestionTool.test.js new file mode 100644 index 000000000..7273bcf27 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/IngestionTool.test.js @@ -0,0 +1,196 @@ +import React from 'react'; +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { vi } from 'vitest'; +import IngestionTool from '../IngestionTool'; +import ingestionService from '../../services/ingestionService'; +import { toast } from 'react-toastify'; + +// Mock the ingestion service +vi.mock('../../services/ingestionService'); + +describe('IngestionTool Component', () => { + const mockConnectionConfig = { + host: 'localhost', + port: 8123, + database: 'test_db', + user: 'test_user', + password: 'test_pass' + }; + + const mockTableData = { + tables: [ + { name: 'table1', columns: ['col1', 'col2'] }, + { name: 'table2', columns: ['col3', 'col4'] } + ] + }; + + const mockSchemaData = { + columns: [ + { name: 'col1', type: 'String' }, + { name: 'col2', type: 'Int32' } + ] + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('renders source selection step initially', () => { + render(); + expect(screen.getByText('Select Source')).toBeInTheDocument(); + }); + + test('handles source selection correctly', async () => { + render(); + + // Select ClickHouse as source + const clickhouseOption = screen.getByText('ClickHouse Database'); + fireEvent.click(clickhouseOption); + + // Should move to connection form + expect(screen.getByText('Configure Connection')).toBeInTheDocument(); + }); + + test('handles connection configuration correctly', async () => { + ingestionService.getRecordCount.mockResolvedValue(1000); + + render(); + + // Select source and fill connection form + const clickhouseOption = screen.getByText('ClickHouse Database'); + fireEvent.click(clickhouseOption); + + // Fill connection form + fireEvent.change(screen.getByLabelText('Host'), { target: { value: 'localhost' } }); + fireEvent.change(screen.getByLabelText('Port'), { target: { value: '8123' } }); + fireEvent.change(screen.getByLabelText('Database'), { target: { value: 'test_db' } }); + fireEvent.change(screen.getByLabelText('User'), { target: { value: 'test_user' } }); + fireEvent.change(screen.getByLabelText('Password'), { target: { value: 'test_pass' } }); + + // Submit form + const connectButton = screen.getByText('Connect'); + fireEvent.click(connectButton); + + // Should move to schema selection + await waitFor(() => { + expect(screen.getByText('Select Schema')).toBeInTheDocument(); + }); + }); + + test('handles schema selection correctly', async () => { + ingestionService.getSchema.mockResolvedValue(mockTableData); + + render(); + + // Complete previous steps + const clickhouseOption = screen.getByText('ClickHouse Database'); + fireEvent.click(clickhouseOption); + + // Fill and submit connection form + fireEvent.change(screen.getByLabelText('Host'), { target: { value: 'localhost' } }); + fireEvent.change(screen.getByLabelText('Port'), { target: { value: '8123' } }); + fireEvent.change(screen.getByLabelText('Database'), { target: { value: 'test_db' } }); + fireEvent.change(screen.getByLabelText('User'), { target: { value: 'test_user' } }); + fireEvent.change(screen.getByLabelText('Password'), { target: { value: 'test_pass' } }); + fireEvent.click(screen.getByText('Connect')); + + // Select table + await waitFor(() => { + const table1 = screen.getByText('table1'); + fireEvent.click(table1); + }); + + // Should move to column selection + expect(screen.getByText('Select Columns')).toBeInTheDocument(); + }); + + test('handles column selection correctly', async () => { + ingestionService.getSchema.mockResolvedValue(mockSchemaData); + + render(); + + // Complete previous steps and select columns + // ... (previous steps) + + // Select columns + const col1Checkbox = screen.getByLabelText('col1'); + const col2Checkbox = screen.getByLabelText('col2'); + fireEvent.click(col1Checkbox); + fireEvent.click(col2Checkbox); + + // Confirm selection + fireEvent.click(screen.getByText('Confirm Selection')); + + // Should move to data preview + expect(screen.getByText('Preview Data')).toBeInTheDocument(); + }); + + test('handles data ingestion execution correctly', async () => { + ingestionService.exportToFile.mockResolvedValue({ jobId: '123' }); + ingestionService.getProgress.mockResolvedValue({ progress: 100, status: 'COMPLETED' }); + + render(); + + // Complete previous steps and execute ingestion + // ... (previous steps) + + // Execute ingestion + const executeButton = screen.getByText('Execute'); + fireEvent.click(executeButton); + + // Should show success message + await waitFor(() => { + expect(toast.success).toHaveBeenCalledWith('Ingestion completed successfully!'); + }); + }); + + test('handles connection errors correctly', async () => { + ingestionService.getRecordCount.mockRejectedValue(new Error('Connection failed')); + + render(); + + // Complete source selection and submit invalid connection + const clickhouseOption = screen.getByText('ClickHouse Database'); + fireEvent.click(clickhouseOption); + + // Submit invalid connection + fireEvent.click(screen.getByText('Connect')); + + // Should show error message + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith('Failed to get record count: Connection failed'); + }); + }); + + test('handles schema loading errors correctly', async () => { + ingestionService.getSchema.mockRejectedValue(new Error('Failed to load schema')); + + render(); + + // Complete previous steps and try to load schema + // ... (previous steps) + + // Should show error message + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith('Failed to get schema: Failed to load schema'); + }); + }); + + test('handles ingestion execution errors correctly', async () => { + ingestionService.exportToFile.mockRejectedValue(new Error('Ingestion failed')); + + render(); + + // Complete previous steps and execute ingestion + // ... (previous steps) + + // Execute ingestion + const executeButton = screen.getByText('Execute'); + fireEvent.click(executeButton); + + // Should show error message + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith('Failed to start ingestion: Ingestion failed'); + }); + }); +}); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ProgressDisplay.test.js b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ProgressDisplay.test.js new file mode 100644 index 000000000..621cd47df --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/ProgressDisplay.test.js @@ -0,0 +1,110 @@ +import React from 'react'; +import { render, screen, fireEvent } from '@testing-library/react'; +import { vi } from 'vitest'; +import ProgressDisplay from '../ProgressDisplay'; + +describe('ProgressDisplay Component', () => { + const mockOnExecute = vi.fn(); + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('renders initial state correctly', () => { + render( + + ); + + expect(screen.getByText('Data Ingestion Progress')).toBeInTheDocument(); + expect(screen.getByText('Ready to start ingestion')).toBeInTheDocument(); + expect(screen.getByRole('progressbar')).toHaveAttribute('aria-valuenow', '0'); + expect(screen.getByRole('button')).not.toBeDisabled(); + }); + + test('renders processing state correctly', () => { + render( + + ); + + expect(screen.getByText('Data Ingestion Progress')).toBeInTheDocument(); + expect(screen.getByText('Processing: 45%')).toBeInTheDocument(); + expect(screen.getByRole('progressbar')).toHaveAttribute('aria-valuenow', '45'); + expect(screen.getByRole('button')).toBeDisabled(); + }); + + test('renders completed state correctly', () => { + render( + + ); + + expect(screen.getByText('Data Ingestion Progress')).toBeInTheDocument(); + expect(screen.getByText('Processing complete')).toBeInTheDocument(); + expect(screen.getByRole('progressbar')).toHaveAttribute('aria-valuenow', '100'); + expect(screen.getByRole('button')).not.toBeDisabled(); + }); + + test('handles execute button click', () => { + render( + + ); + + const executeButton = screen.getByRole('button'); + fireEvent.click(executeButton); + + expect(mockOnExecute).toHaveBeenCalledTimes(1); + }); + + test('disables execute button during processing', () => { + render( + + ); + + const executeButton = screen.getByRole('button'); + fireEvent.click(executeButton); + + expect(mockOnExecute).not.toHaveBeenCalled(); + }); + + test('updates progress display correctly', () => { + const { rerender } = render( + + ); + + expect(screen.getByRole('progressbar')).toHaveAttribute('aria-valuenow', '0'); + + rerender( + + ); + + expect(screen.getByRole('progressbar')).toHaveAttribute('aria-valuenow', '75'); + expect(screen.getByText('Processing: 75%')).toBeInTheDocument(); + }); +}); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/SchemaSelection.test.js b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/SchemaSelection.test.js new file mode 100644 index 000000000..5f07ca4d0 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/components/__tests__/SchemaSelection.test.js @@ -0,0 +1,178 @@ +import React from 'react'; +import { render, screen, fireEvent, waitFor } from '@testing-library/react'; +import { vi } from 'vitest'; +import SchemaSelection from '../SchemaSelection'; +import { toast } from 'react-toastify'; +import axios from 'axios'; + +vi.mock('axios'); + +describe('SchemaSelection Component', () => { + const mockOnTableSelect = vi.fn(); + const mockConnectionConfig = { + host: 'localhost', + port: 8123, + database: 'test_db', + user: 'test_user', + password: 'test_pass' + }; + + const mockTables = [ + { name: 'users', rowCount: 1000 }, + { name: 'orders', rowCount: 5000 }, + { name: 'products', rowCount: 200 } + ]; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test('renders loading state initially', () => { + render( + + ); + + expect(screen.getByText('Loading tables...')).toBeInTheDocument(); + }); + + test('renders tables from ClickHouse correctly', async () => { + axios.get.mockResolvedValueOnce({ data: mockTables }); + + render( + + ); + + await waitFor(() => { + mockTables.forEach(table => { + expect(screen.getByText(table.name)).toBeInTheDocument(); + expect(screen.getByText(`${table.rowCount.toLocaleString()} rows`)).toBeInTheDocument(); + }); + }); + }); + + test('renders tables from flat file correctly', async () => { + const mockFileTables = [ + { name: 'data.csv', rowCount: 100 } + ]; + axios.get.mockResolvedValueOnce({ data: mockFileTables }); + + render( + + ); + + await waitFor(() => { + expect(screen.getByText('data.csv')).toBeInTheDocument(); + expect(screen.getByText('100 rows')).toBeInTheDocument(); + }); + }); + + test('handles table selection correctly', async () => { + axios.get.mockResolvedValueOnce({ data: mockTables }); + + render( + + ); + + await waitFor(() => { + const tableButton = screen.getByText('users'); + fireEvent.click(tableButton); + }); + + expect(mockOnTableSelect).toHaveBeenCalledWith('users'); + }); + + test('handles API error for ClickHouse tables', async () => { + const errorMessage = 'Failed to fetch tables'; + axios.get.mockRejectedValueOnce(new Error(errorMessage)); + + render( + + ); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith(`Error fetching tables: ${errorMessage}`); + }); + expect(screen.getByText('No tables available')).toBeInTheDocument(); + }); + + test('handles API error for flat file tables', async () => { + const errorMessage = 'Failed to fetch file tables'; + axios.get.mockRejectedValueOnce(new Error(errorMessage)); + + render( + + ); + + await waitFor(() => { + expect(toast.error).toHaveBeenCalledWith(`Error fetching tables: ${errorMessage}`); + }); + expect(screen.getByText('No tables available')).toBeInTheDocument(); + }); + + test('displays empty state when no tables are available', async () => { + axios.get.mockResolvedValueOnce({ data: [] }); + + render( + + ); + + await waitFor(() => { + expect(screen.getByText('No tables available')).toBeInTheDocument(); + }); + }); + + test('refreshes tables when connection config changes', async () => { + const { rerender } = render( + + ); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(1); + }); + + const newConfig = { ...mockConnectionConfig, database: 'new_db' }; + rerender( + + ); + + await waitFor(() => { + expect(axios.get).toHaveBeenCalledTimes(2); + }); + }); +}); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/index.css b/clickhouse-flatfile-ingestion/frontend/src/index.css new file mode 100644 index 000000000..e44138b1a --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/index.css @@ -0,0 +1,111 @@ +body { + margin: 0; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + background-color: #f5f5f5; +} + +code { + font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', + monospace; +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 20px; +} + +.card { + background-color: white; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + padding: 20px; + margin-bottom: 20px; +} + +.form-group { + margin-bottom: 15px; +} + +.form-control { + width: 100%; + padding: 10px; + border: 1px solid #ddd; + border-radius: 4px; + font-size: 16px; +} + +.btn { + padding: 10px 15px; + border: none; + border-radius: 4px; + cursor: pointer; + font-size: 16px; + font-weight: 500; +} + +.btn-primary { + background-color: #1976d2; + color: white; +} + +.btn-secondary { + background-color: #6c757d; + color: white; +} + +.btn-success { + background-color: #28a745; + color: white; +} + +.btn-danger { + background-color: #dc3545; + color: white; +} + +.progress-bar { + height: 10px; + background-color: #e9ecef; + border-radius: 5px; + overflow: hidden; + margin: 10px 0; +} + +.progress-bar-fill { + height: 100%; + background-color: #1976d2; + transition: width 0.3s ease; +} + +.status-badge { + display: inline-block; + padding: 5px 10px; + border-radius: 20px; + font-size: 12px; + font-weight: 500; +} + +.status-success { + background-color: #d4edda; + color: #155724; +} + +.status-error { + background-color: #f8d7da; + color: #721c24; +} + +.status-pending { + background-color: #fff3cd; + color: #856404; +} + +.status-processing { + background-color: #cce5ff; + color: #004085; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/index.js b/clickhouse-flatfile-ingestion/frontend/src/index.js new file mode 100644 index 000000000..7175c6f10 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/index.js @@ -0,0 +1,17 @@ +import React from 'react'; +import ReactDOM from 'react-dom/client'; +import { BrowserRouter } from 'react-router-dom'; +import App from './App'; +import './index.css'; +import { ToastContainer } from 'react-toastify'; +import 'react-toastify/dist/ReactToastify.css'; + +const root = ReactDOM.createRoot(document.getElementById('root')); +root.render( + + + + + + +); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/pages/ConnectionSetup.js b/clickhouse-flatfile-ingestion/frontend/src/pages/ConnectionSetup.js new file mode 100644 index 000000000..b96bc815f --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/pages/ConnectionSetup.js @@ -0,0 +1,174 @@ +import React, { useState } from 'react'; +import { + Typography, + Box, + TextField, + Button, + Card, + CardContent, + Grid, + Alert, + Snackbar +} from '@mui/material'; +import { useNavigate } from 'react-router-dom'; +import axios from 'axios'; + +const ConnectionSetup = () => { + const navigate = useNavigate(); + const [formData, setFormData] = useState({ + host: '', + port: '8123', + database: '', + username: '', + password: '', + secure: false + }); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(false); + const [openSnackbar, setOpenSnackbar] = useState(false); + + const handleChange = (e) => { + const { name, value, type, checked } = e.target; + setFormData({ + ...formData, + [name]: type === 'checkbox' ? checked : value + }); + }; + + const handleSubmit = async (e) => { + e.preventDefault(); + setLoading(true); + setError(''); + + try { + // In a real application, this would be an API call to your backend + // const response = await axios.post('/api/connections', formData); + + // Simulating API call for demonstration + await new Promise(resolve => setTimeout(resolve, 1000)); + + setSuccess(true); + setOpenSnackbar(true); + + // Navigate to table mapping after successful connection + setTimeout(() => { + navigate('/table-mapping'); + }, 1500); + } catch (err) { + setError(err.response?.data?.message || 'Failed to establish connection'); + } finally { + setLoading(false); + } + }; + + const handleCloseSnackbar = () => { + setOpenSnackbar(false); + }; + + return ( + + + ClickHouse Connection Setup + + + Configure your connection to the ClickHouse database + + + + +
+ + + + + + + + + + + + + + + + + + + + +
+ + {error && ( + + {error} + + )} +
+
+ + + + Connection established successfully! + + +
+ ); +}; + +export default ConnectionSetup; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/pages/Dashboard.js b/clickhouse-flatfile-ingestion/frontend/src/pages/Dashboard.js new file mode 100644 index 000000000..d4180c02c --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/pages/Dashboard.js @@ -0,0 +1,136 @@ +import React from 'react'; +import { + Typography, + Grid, + Card, + CardContent, + CardActions, + Button, + Box +} from '@mui/material'; +import { Link as RouterLink } from 'react-router-dom'; +import StorageIcon from '@mui/icons-material/Storage'; +import TableChartIcon from '@mui/icons-material/TableChart'; +import CloudUploadIcon from '@mui/icons-material/CloudUpload'; +import AssessmentIcon from '@mui/icons-material/Assessment'; + +const Dashboard = () => { + return ( + + + ClickHouse Flat File Ingestion + + + A tool for ingesting flat files into ClickHouse databases with mapping capabilities + + + + + + + + + + + Connection Setup + + + Configure your ClickHouse database connection settings + + + + + + + + + + + + + + + + Table Mapping + + + Map your flat file columns to ClickHouse table columns + + + + + + + + + + + + + + + + Data Ingestion + + + Upload and process your flat files for ingestion + + + + + + + + + + + + + + + + Ingestion Status + + + Monitor the status of your data ingestion jobs + + + + + + + + + + ); +}; + +export default Dashboard; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/pages/DataIngestion.js b/clickhouse-flatfile-ingestion/frontend/src/pages/DataIngestion.js new file mode 100644 index 000000000..351563940 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/pages/DataIngestion.js @@ -0,0 +1,560 @@ +import React, { useState, useEffect } from 'react'; +import { + Typography, + Box, + Button, + Card, + CardContent, + Grid, + TextField, + FormControl, + InputLabel, + Select, + MenuItem, + Alert, + Snackbar, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + CircularProgress, + LinearProgress, + Divider, + Chip +} from '@mui/material'; +import { useNavigate } from 'react-router-dom'; +import PlayArrowIcon from '@mui/icons-material/PlayArrow'; +import StopIcon from '@mui/icons-material/Stop'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import axios from 'axios'; + +const DataIngestion = () => { + const navigate = useNavigate(); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(false); + const [openSnackbar, setOpenSnackbar] = useState(false); + const [snackbarMessage, setSnackbarMessage] = useState(''); + + // Form state + const [formData, setFormData] = useState({ + batchSize: 1000, + maxRetries: 3, + timeout: 30000, + validateData: true + }); + + // Ingestion state + const [ingestionStatus, setIngestionStatus] = useState({ + status: 'idle', // idle, running, completed, failed, paused + progress: 0, + recordsProcessed: 0, + totalRecords: 0, + startTime: null, + endTime: null, + errorMessage: null + }); + + // Available mappings + const [availableMappings, setAvailableMappings] = useState([]); + const [selectedMapping, setSelectedMapping] = useState(''); + + // Load available mappings when component mounts + useEffect(() => { + // In a real app, this would be an API call + // const fetchMappings = async () => { + // try { + // const response = await axios.get('/api/mappings'); + // setAvailableMappings(response.data); + // } catch (err) { + // setError('Failed to load available mappings'); + // } + // }; + // fetchMappings(); + + // Sample data + setAvailableMappings([ + { id: '1', name: 'Users Mapping', tableName: 'users' }, + { id: '2', name: 'Orders Mapping', tableName: 'orders' }, + { id: '3', name: 'Products Mapping', tableName: 'products' } + ]); + }, []); + + // Poll for ingestion status if ingestion is running + useEffect(() => { + let intervalId; + + if (ingestionStatus.status === 'running') { + intervalId = setInterval(() => { + // In a real app, this would be an API call + // const fetchStatus = async () => { + // try { + // const response = await axios.get(`/api/ingestion/status/${ingestionStatus.jobId}`); + // setIngestionStatus(response.data); + // + // if (response.data.status === 'completed' || response.data.status === 'failed') { + // clearInterval(intervalId); + // } + // } catch (err) { + // setError('Failed to fetch ingestion status'); + // clearInterval(intervalId); + // } + // }; + // fetchStatus(); + + // Sample data - simulate progress + setIngestionStatus(prevStatus => { + const newProgress = Math.min(prevStatus.progress + 5, 100); + const newRecordsProcessed = Math.floor((newProgress / 100) * prevStatus.totalRecords); + + if (newProgress === 100) { + return { + ...prevStatus, + status: 'completed', + progress: newProgress, + recordsProcessed: newRecordsProcessed, + endTime: new Date().toISOString() + }; + } + + return { + ...prevStatus, + progress: newProgress, + recordsProcessed: newRecordsProcessed + }; + }); + }, 2000); + } + + return () => { + if (intervalId) { + clearInterval(intervalId); + } + }; + }, [ingestionStatus.status]); + + const handleFormChange = (e) => { + const { name, value, type, checked } = e.target; + setFormData({ + ...formData, + [name]: type === 'checkbox' ? checked : value + }); + }; + + const handleMappingChange = (event) => { + setSelectedMapping(event.target.value); + }; + + const startIngestion = async () => { + if (!selectedMapping) { + setError('Please select a mapping'); + return; + } + + setLoading(true); + setError(''); + + try { + // In a real application, this would be an API call to your backend + // const response = await axios.post('/api/ingestion/start', { + // mappingId: selectedMapping, + // batchSize: formData.batchSize, + // maxRetries: formData.maxRetries, + // timeout: formData.timeout, + // validateData: formData.validateData + // }); + + // Simulating API call for demonstration + await new Promise(resolve => setTimeout(resolve, 1000)); + + // Sample response + const jobId = 'job-' + Math.random().toString(36).substring(2, 9); + + setIngestionStatus({ + status: 'running', + progress: 0, + recordsProcessed: 0, + totalRecords: 10000, + startTime: new Date().toISOString(), + endTime: null, + errorMessage: null, + jobId + }); + + setSuccess(true); + setSnackbarMessage('Ingestion started successfully'); + setOpenSnackbar(true); + } catch (err) { + setError(err.response?.data?.message || 'Failed to start ingestion'); + } finally { + setLoading(false); + } + }; + + const stopIngestion = async () => { + setLoading(true); + + try { + // In a real application, this would be an API call to your backend + // await axios.post(`/api/ingestion/stop/${ingestionStatus.jobId}`); + + // Simulating API call for demonstration + await new Promise(resolve => setTimeout(resolve, 500)); + + setIngestionStatus(prevStatus => ({ + ...prevStatus, + status: 'paused', + endTime: new Date().toISOString() + })); + + setSnackbarMessage('Ingestion paused'); + setOpenSnackbar(true); + } catch (err) { + setError(err.response?.data?.message || 'Failed to stop ingestion'); + } finally { + setLoading(false); + } + }; + + const handleCloseSnackbar = () => { + setOpenSnackbar(false); + }; + + const getStatusColor = (status) => { + switch (status) { + case 'running': + return 'primary'; + case 'completed': + return 'success'; + case 'failed': + return 'error'; + case 'paused': + return 'warning'; + default: + return 'default'; + } + }; + + const formatTime = (timeString) => { + if (!timeString) return ''; + const date = new Date(timeString); + return date.toLocaleString(); + }; + + const calculateDuration = (startTime, endTime) => { + if (!startTime) return ''; + const start = new Date(startTime); + const end = endTime ? new Date(endTime) : new Date(); + const diffMs = end - start; + const diffSec = Math.floor(diffMs / 1000); + const diffMin = Math.floor(diffSec / 60); + const diffHour = Math.floor(diffMin / 60); + + if (diffHour > 0) { + return `${diffHour}h ${diffMin % 60}m ${diffSec % 60}s`; + } else if (diffMin > 0) { + return `${diffMin}m ${diffSec % 60}s`; + } else { + return `${diffSec}s`; + } + }; + + return ( + + + Data Ingestion + + + Start and monitor data ingestion into ClickHouse + + + + + + + + Ingestion Configuration + + + + Select Mapping + + + + + + + + + + + + + + + + Validate Data + + + + + + + + + {ingestionStatus.status === 'running' && ( + + )} + + + + + + + + + + Ingestion Status + + + + + Status: + + + + + {ingestionStatus.status !== 'idle' && ( + <> + + + Progress: {ingestionStatus.progress}% + + + + + + + + Records Processed: + + + {ingestionStatus.recordsProcessed.toLocaleString()} / {ingestionStatus.totalRecords.toLocaleString()} + + + + + Duration: + + + {calculateDuration(ingestionStatus.startTime, ingestionStatus.endTime)} + + + + + + + + + + Start Time: + + + {formatTime(ingestionStatus.startTime)} + + + {ingestionStatus.endTime && ( + + + End Time: + + + {formatTime(ingestionStatus.endTime)} + + + )} + + + {ingestionStatus.errorMessage && ( + + {ingestionStatus.errorMessage} + + )} + + )} + + {ingestionStatus.status === 'idle' && ( + + + No ingestion in progress. Select a mapping and click "Start Ingestion" to begin. + + + )} + + + + + + + + + + Recent Ingestion Jobs + + + + + + + + + Job ID + Mapping + Status + Records + Start Time + Duration + + + + {/* Sample data - in a real app, this would come from an API call */} + + job-abc123 + Users Mapping + + + + 10,000 / 10,000 + 2023-04-15 10:30:45 + 2m 15s + + + job-def456 + Orders Mapping + + + + 5,000 / 20,000 + 2023-04-14 15:20:10 + 1m 30s + + + job-ghi789 + Products Mapping + + + + 7,500 / 15,000 + 2023-04-13 09:15:30 + 3m 45s + + +
+
+
+
+
+
+ + {error && ( + + {error} + + )} + + + + {snackbarMessage} + + +
+ ); +}; + +export default DataIngestion; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/pages/FileUploadPage.js b/clickhouse-flatfile-ingestion/frontend/src/pages/FileUploadPage.js new file mode 100644 index 000000000..7c89d6624 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/pages/FileUploadPage.js @@ -0,0 +1,95 @@ +import React, { useState } from 'react'; +import { useNavigate } from 'react-router-dom'; +import { + Box, + Typography, + Card, + CardContent, + Alert, + Stepper, + Step, + StepLabel, + Button +} from '@mui/material'; +import FileUpload from '../components/FileUpload'; + +const FileUploadPage = () => { + const navigate = useNavigate(); + const [activeStep, setActiveStep] = useState(0); + const [uploadedFiles, setUploadedFiles] = useState([]); + const [error, setError] = useState(''); + + const steps = [ + 'Upload Files', + 'Configure Mapping', + 'Start Ingestion' + ]; + + const handleFileUploaded = (file) => { + setUploadedFiles(prev => [...prev, file]); + setError(''); + }; + + const handleNext = () => { + if (uploadedFiles.length === 0) { + setError('Please upload at least one file before proceeding'); + return; + } + + if (activeStep === 0) { + // Navigate to table mapping page with uploaded files info + navigate('/mapping', { + state: { + files: uploadedFiles.map(f => ({ + name: f.name, + size: f.size, + type: f.type + })) + } + }); + } + }; + + return ( + + + Upload Files + + + Upload your CSV, TSV, or TXT files for ingestion into ClickHouse + + + + + + {steps.map((label) => ( + + {label} + + ))} + + + {error && ( + + {error} + + )} + + + + + + + + + + ); +}; + +export default FileUploadPage; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/pages/IngestionStatus.js b/clickhouse-flatfile-ingestion/frontend/src/pages/IngestionStatus.js new file mode 100644 index 000000000..6a9435dfd --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/pages/IngestionStatus.js @@ -0,0 +1,463 @@ +import React, { useState, useEffect } from 'react'; +import { + Typography, + Box, + Card, + CardContent, + Grid, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + TablePagination, + Chip, + TextField, + InputAdornment, + IconButton, + Button, + Dialog, + DialogTitle, + DialogContent, + DialogActions, + Alert, + CircularProgress, + Divider +} from '@mui/material'; +import SearchIcon from '@mui/icons-material/Search'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import VisibilityIcon from '@mui/icons-material/Visibility'; +import DownloadIcon from '@mui/icons-material/Download'; +import axios from 'axios'; + +const IngestionStatus = () => { + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [jobs, setJobs] = useState([]); + const [filteredJobs, setFilteredJobs] = useState([]); + const [searchTerm, setSearchTerm] = useState(''); + const [page, setPage] = useState(0); + const [rowsPerPage, setRowsPerPage] = useState(10); + const [selectedJob, setSelectedJob] = useState(null); + const [openDialog, setOpenDialog] = useState(false); + + // Load jobs when component mounts + useEffect(() => { + fetchJobs(); + }, []); + + // Filter jobs when search term changes + useEffect(() => { + if (searchTerm) { + const filtered = jobs.filter(job => + job.jobId.toLowerCase().includes(searchTerm.toLowerCase()) || + job.mappingName.toLowerCase().includes(searchTerm.toLowerCase()) || + job.status.toLowerCase().includes(searchTerm.toLowerCase()) + ); + setFilteredJobs(filtered); + } else { + setFilteredJobs(jobs); + } + setPage(0); + }, [searchTerm, jobs]); + + const fetchJobs = async () => { + setLoading(true); + setError(''); + + try { + // In a real app, this would be an API call + // const response = await axios.get('/api/ingestion/jobs'); + // setJobs(response.data); + + // Sample data + await new Promise(resolve => setTimeout(resolve, 1000)); + setJobs([ + { + jobId: 'job-abc123', + mappingName: 'Users Mapping', + tableName: 'users', + status: 'completed', + recordsProcessed: 10000, + totalRecords: 10000, + startTime: '2023-04-15T10:30:45Z', + endTime: '2023-04-15T10:33:00Z', + errorMessage: null, + batchSize: 1000, + maxRetries: 3, + validateData: true + }, + { + jobId: 'job-def456', + mappingName: 'Orders Mapping', + tableName: 'orders', + status: 'failed', + recordsProcessed: 5000, + totalRecords: 20000, + startTime: '2023-04-14T15:20:10Z', + endTime: '2023-04-14T15:21:40Z', + errorMessage: 'Connection timeout after 3 retries', + batchSize: 500, + maxRetries: 3, + validateData: true + }, + { + jobId: 'job-ghi789', + mappingName: 'Products Mapping', + tableName: 'products', + status: 'running', + recordsProcessed: 7500, + totalRecords: 15000, + startTime: '2023-04-13T09:15:30Z', + endTime: null, + errorMessage: null, + batchSize: 1000, + maxRetries: 3, + validateData: true + } + ]); + } catch (err) { + setError('Failed to load ingestion jobs'); + } finally { + setLoading(false); + } + }; + + const handleSearchChange = (event) => { + setSearchTerm(event.target.value); + }; + + const handleChangePage = (event, newPage) => { + setPage(newPage); + }; + + const handleChangeRowsPerPage = (event) => { + setRowsPerPage(parseInt(event.target.value, 10)); + setPage(0); + }; + + const handleViewDetails = (job) => { + setSelectedJob(job); + setOpenDialog(true); + }; + + const handleCloseDialog = () => { + setOpenDialog(false); + setSelectedJob(null); + }; + + const handleDownloadLogs = async (jobId) => { + try { + // In a real app, this would be an API call + // const response = await axios.get(`/api/ingestion/jobs/${jobId}/logs`, { + // responseType: 'blob' + // }); + // const url = window.URL.createObjectURL(new Blob([response.data])); + // const link = document.createElement('a'); + // link.href = url; + // link.setAttribute('download', `ingestion-${jobId}-logs.txt`); + // document.body.appendChild(link); + // link.click(); + // link.remove(); + + // Simulating API call for demonstration + await new Promise(resolve => setTimeout(resolve, 500)); + alert('Logs downloaded successfully'); + } catch (err) { + setError('Failed to download logs'); + } + }; + + const getStatusColor = (status) => { + switch (status) { + case 'running': + return 'primary'; + case 'completed': + return 'success'; + case 'failed': + return 'error'; + case 'paused': + return 'warning'; + default: + return 'default'; + } + }; + + const formatTime = (timeString) => { + if (!timeString) return ''; + const date = new Date(timeString); + return date.toLocaleString(); + }; + + const calculateDuration = (startTime, endTime) => { + if (!startTime) return ''; + const start = new Date(startTime); + const end = endTime ? new Date(endTime) : new Date(); + const diffMs = end - start; + const diffSec = Math.floor(diffMs / 1000); + const diffMin = Math.floor(diffSec / 60); + const diffHour = Math.floor(diffMin / 60); + + if (diffHour > 0) { + return `${diffHour}h ${diffMin % 60}m ${diffSec % 60}s`; + } else if (diffMin > 0) { + return `${diffMin}m ${diffSec % 60}s`; + } else { + return `${diffSec}s`; + } + }; + + return ( + + + Ingestion Status + + + View and monitor data ingestion jobs + + + + + + + + + + ), + }} + /> + + + + + + + + + {error && ( + + {error} + + )} + + + + {loading ? ( + + + + ) : ( + <> + + + + + Job ID + Mapping + Status + Records + Start Time + Duration + Actions + + + + {filteredJobs + .slice(page * rowsPerPage, page * rowsPerPage + rowsPerPage) + .map((job) => ( + + {job.jobId} + {job.mappingName} + + + + + {job.recordsProcessed.toLocaleString()} / {job.totalRecords.toLocaleString()} + + {formatTime(job.startTime)} + {calculateDuration(job.startTime, job.endTime)} + + handleViewDetails(job)} + title="View Details" + > + + + handleDownloadLogs(job.jobId)} + title="Download Logs" + > + + + + + ))} + +
+
+ + + + )} +
+
+ + + {selectedJob && ( + <> + + Job Details: {selectedJob.jobId} + + + + + + Mapping Name + + + {selectedJob.mappingName} + + + + + Table Name + + + {selectedJob.tableName} + + + + + Status + + + + + + Records Processed + + + {selectedJob.recordsProcessed.toLocaleString()} / {selectedJob.totalRecords.toLocaleString()} + + + + + + + + Start Time + + + {formatTime(selectedJob.startTime)} + + + + + End Time + + + {selectedJob.endTime ? formatTime(selectedJob.endTime) : 'In Progress'} + + + + + Duration + + + {calculateDuration(selectedJob.startTime, selectedJob.endTime)} + + + + + Batch Size + + + {selectedJob.batchSize.toLocaleString()} + + + + + Max Retries + + + {selectedJob.maxRetries} + + + + + Data Validation + + + {selectedJob.validateData ? 'Enabled' : 'Disabled'} + + + {selectedJob.errorMessage && ( + + + + Error Message + + + {selectedJob.errorMessage} + + + )} + + + + + + + + )} + +
+ ); +}; + +export default IngestionStatus; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/pages/TableMapping.js b/clickhouse-flatfile-ingestion/frontend/src/pages/TableMapping.js new file mode 100644 index 000000000..fa6816e33 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/pages/TableMapping.js @@ -0,0 +1,358 @@ +import React, { useState, useEffect } from 'react'; +import { + Typography, + Box, + Button, + Card, + CardContent, + Grid, + TextField, + FormControl, + InputLabel, + Select, + MenuItem, + Alert, + Snackbar, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + IconButton, + Tooltip, + Divider +} from '@mui/material'; +import { useNavigate } from 'react-router-dom'; +import DeleteIcon from '@mui/icons-material/Delete'; +import AddIcon from '@mui/icons-material/Add'; +import SaveIcon from '@mui/icons-material/Save'; +import axios from 'axios'; + +const TableMapping = () => { + const navigate = useNavigate(); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [success, setSuccess] = useState(false); + const [openSnackbar, setOpenSnackbar] = useState(false); + + // Sample data - in a real app, this would come from API calls + const [availableTables, setAvailableTables] = useState([ + 'users', 'orders', 'products', 'customers' + ]); + const [selectedTable, setSelectedTable] = useState(''); + const [tableColumns, setTableColumns] = useState([]); + const [fileColumns, setFileColumns] = useState([]); + const [mappings, setMappings] = useState([]); + + // Form state + const [formData, setFormData] = useState({ + tableName: '', + fileColumns: '', + delimiter: ',', + hasHeader: true + }); + + // Load available tables when component mounts + useEffect(() => { + // In a real app, this would be an API call + // const fetchTables = async () => { + // try { + // const response = await axios.get('/api/tables'); + // setAvailableTables(response.data); + // } catch (err) { + // setError('Failed to load available tables'); + // } + // }; + // fetchTables(); + }, []); + + // Load table columns when a table is selected + useEffect(() => { + if (selectedTable) { + // In a real app, this would be an API call + // const fetchTableColumns = async () => { + // try { + // const response = await axios.get(`/api/tables/${selectedTable}/columns`); + // setTableColumns(response.data); + // } catch (err) { + // setError('Failed to load table columns'); + // } + // }; + // fetchTableColumns(); + + // Sample data + setTableColumns([ + { name: 'id', type: 'UInt32' }, + { name: 'name', type: 'String' }, + { name: 'email', type: 'String' }, + { name: 'created_at', type: 'DateTime' } + ]); + } + }, [selectedTable]); + + const handleTableChange = (event) => { + setSelectedTable(event.target.value); + }; + + const handleFormChange = (e) => { + const { name, value, type, checked } = e.target; + setFormData({ + ...formData, + [name]: type === 'checkbox' ? checked : value + }); + }; + + const handleFileUpload = (event) => { + const file = event.target.files[0]; + if (file) { + // In a real app, you might want to parse the file to get columns + // For now, we'll use sample data + setFileColumns(['id', 'full_name', 'email_address', 'registration_date']); + + // Create initial mappings + const initialMappings = fileColumns.map((fileCol, index) => { + const tableCol = tableColumns[index] ? tableColumns[index].name : ''; + return { + fileColumn: fileCol, + tableColumn: tableCol, + transformation: '' + }; + }); + setMappings(initialMappings); + } + }; + + const handleMappingChange = (index, field, value) => { + const updatedMappings = [...mappings]; + updatedMappings[index] = { + ...updatedMappings[index], + [field]: value + }; + setMappings(updatedMappings); + }; + + const addMapping = () => { + setMappings([ + ...mappings, + { fileColumn: '', tableColumn: '', transformation: '' } + ]); + }; + + const removeMapping = (index) => { + const updatedMappings = [...mappings]; + updatedMappings.splice(index, 1); + setMappings(updatedMappings); + }; + + const handleSubmit = async (e) => { + e.preventDefault(); + setLoading(true); + setError(''); + + try { + // In a real application, this would be an API call to your backend + // const response = await axios.post('/api/table-mappings', { + // tableName: selectedTable, + // mappings: mappings + // }); + + // Simulating API call for demonstration + await new Promise(resolve => setTimeout(resolve, 1000)); + + setSuccess(true); + setOpenSnackbar(true); + + // Navigate to data ingestion after successful mapping + setTimeout(() => { + navigate('/data-ingestion'); + }, 1500); + } catch (err) { + setError(err.response?.data?.message || 'Failed to save table mapping'); + } finally { + setLoading(false); + } + }; + + const handleCloseSnackbar = () => { + setOpenSnackbar(false); + }; + + return ( + + + Table Mapping + + + Map your flat file columns to ClickHouse table columns + + + + +
+ + + + Select ClickHouse Table + + + + + + + + + {selectedTable && fileColumns.length > 0 && ( + <> + + + + Column Mappings + + + Map your flat file columns to the selected table columns + + + + + + + + + File Column + Table Column + Transformation (Optional) + + + + + {mappings.map((mapping, index) => ( + + + + + + + + + + + + + handleMappingChange(index, 'transformation', e.target.value)} + /> + + + + removeMapping(index)} + disabled={mappings.length <= 1} + > + + + + + + ))} + +
+
+
+ + + + + + + + + + )} +
+
+ + {error && ( + + {error} + + )} +
+
+ + + + Table mapping saved successfully! + + +
+ ); +}; + +export default TableMapping; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/services/api.js b/clickhouse-flatfile-ingestion/frontend/src/services/api.js new file mode 100644 index 000000000..bf7082a6a --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/services/api.js @@ -0,0 +1,125 @@ +import axios from 'axios'; + +const API_BASE_URL = process.env.REACT_APP_API_BASE_URL || 'http://localhost:8080/api'; + +const api = axios.create({ + baseURL: API_BASE_URL, + headers: { + 'Content-Type': 'application/json' + } +}); + +// File Upload Service +export const fileService = { + upload: async (file, onProgress) => { + const formData = new FormData(); + formData.append('file', file); + + return api.post('/files/upload', formData, { + headers: { + 'Content-Type': 'multipart/form-data' + }, + onUploadProgress: (progressEvent) => { + const progress = Math.round((progressEvent.loaded * 100) / progressEvent.total); + if (onProgress) { + onProgress(progress); + } + } + }); + }, + + getPreview: async (fileId) => { + return api.get(`/files/${fileId}/preview`); + }, + + delete: async (fileId) => { + return api.delete(`/files/${fileId}`); + } +}; + +// Table Mapping Service +export const mappingService = { + getAvailableTables: async () => { + return api.get('/tables'); + }, + + getTableColumns: async (tableName) => { + return api.get(`/tables/${tableName}/columns`); + }, + + saveMapping: async (mapping) => { + return api.post('/mappings', mapping); + }, + + getMappings: async () => { + return api.get('/mappings'); + }, + + getMapping: async (mappingId) => { + return api.get(`/mappings/${mappingId}`); + }, + + deleteMapping: async (mappingId) => { + return api.delete(`/mappings/${mappingId}`); + } +}; + +// Ingestion Service +export const ingestionService = { + startIngestion: async (config) => { + return api.post('/ingestion/start', config); + }, + + stopIngestion: async (jobId) => { + return api.post(`/ingestion/${jobId}/stop`); + }, + + getStatus: async (jobId) => { + return api.get(`/ingestion/${jobId}/status`); + }, + + getJobs: async (params) => { + return api.get('/ingestion/jobs', { params }); + }, + + getJobDetails: async (jobId) => { + return api.get(`/ingestion/jobs/${jobId}`); + }, + + getLogs: async (jobId) => { + return api.get(`/ingestion/jobs/${jobId}/logs`, { + responseType: 'blob' + }); + } +}; + +// Connection Service +export const connectionService = { + testConnection: async (config) => { + return api.post('/connection/test', config); + }, + + saveConnection: async (config) => { + return api.post('/connection', config); + }, + + getConnection: async () => { + return api.get('/connection'); + }, + + updateConnection: async (config) => { + return api.put('/connection', config); + } +}; + +// Error Interceptor +api.interceptors.response.use( + (response) => response, + (error) => { + const message = error.response?.data?.message || 'An unexpected error occurred'; + console.error('API Error:', message); + return Promise.reject(error); + } +); + +export default api; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/services/authService.js b/clickhouse-flatfile-ingestion/frontend/src/services/authService.js new file mode 100644 index 000000000..8bfb24793 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/services/authService.js @@ -0,0 +1,63 @@ +import axios from 'axios'; + +const API_URL = process.env.REACT_APP_API_URL || 'http://localhost:8080/api'; + +const authService = { + login: async (username, password) => { + try { + const response = await axios.post(`${API_URL}/auth/login`, { + username, + password + }); + if (response.data.token) { + localStorage.setItem('token', response.data.token); + localStorage.setItem('user', JSON.stringify(response.data.user)); + } + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Login failed' }; + } + }, + + logout: () => { + localStorage.removeItem('token'); + localStorage.removeItem('user'); + }, + + getCurrentUser: () => { + return JSON.parse(localStorage.getItem('user')); + }, + + getToken: () => { + return localStorage.getItem('token'); + }, + + // Axios interceptor to add token to requests + setupAxiosInterceptors: () => { + axios.interceptors.request.use( + (config) => { + const token = authService.getToken(); + if (token) { + config.headers.Authorization = `Bearer ${token}`; + } + return config; + }, + (error) => { + return Promise.reject(error); + } + ); + + axios.interceptors.response.use( + (response) => response, + (error) => { + if (error.response?.status === 401) { + authService.logout(); + window.location.href = '/login'; + } + return Promise.reject(error); + } + ); + } +}; + +export default authService; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/services/clickhouseService.js b/clickhouse-flatfile-ingestion/frontend/src/services/clickhouseService.js new file mode 100644 index 000000000..1298b98a3 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/services/clickhouseService.js @@ -0,0 +1,200 @@ +import axios from 'axios'; +import api from './api'; + +const API_BASE_URL = process.env.REACT_APP_API_BASE_URL || 'http://localhost:8080/api'; + +/** + * Service for interacting with ClickHouse database + */ +const clickhouseService = { + /** + * Test connection to ClickHouse database + * @param {Object} config - Connection configuration + * @returns {Promise} - Connection test result + */ + testConnection: async (config) => { + try { + const response = await axios.post(`${API_BASE_URL}/test-connection`, null, { + headers: { + 'X-ClickHouse-Host': config.host, + 'X-ClickHouse-Port': config.port, + 'X-ClickHouse-Database': config.database, + 'X-ClickHouse-Username': config.username, + 'X-ClickHouse-Password': config.password + } + }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to test connection'); + } + }, + + /** + * Get list of available databases + * @param {Object} config - Connection configuration + * @returns {Promise} - List of databases + */ + getDatabases: async (config) => { + try { + const response = await axios.post(`${API_BASE_URL}/clickhouse/databases`, config); + return response.data; + } catch (error) { + throw new Error('Failed to fetch databases'); + } + }, + + /** + * Get list of tables in a database + * @param {Object} config - Connection configuration + * @param {string} database - Database name + * @returns {Promise} - List of tables + */ + getTables: async (config) => { + try { + const response = await axios.get(`${API_BASE_URL}/tables`, { + headers: { + 'X-ClickHouse-Host': config.host, + 'X-ClickHouse-Port': config.port, + 'X-ClickHouse-Database': config.database, + 'X-ClickHouse-Username': config.username, + 'X-ClickHouse-Password': config.password + } + }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to fetch tables'); + } + }, + + /** + * Get schema for a specific table + * @param {Object} config - Connection configuration + * @param {string} table - Table name + * @returns {Promise} - Table schema + */ + getTableSchema: async (config, table) => { + try { + const response = await axios.post(`${API_BASE_URL}/clickhouse/schema`, { + ...config, + table + }); + return response.data; + } catch (error) { + throw new Error('Failed to fetch table schema'); + } + }, + + /** + * Preview data from a table + * @param {Object} config - Connection configuration + * @param {string} table - Table name + * @param {number} limit - Number of rows to preview + * @returns {Promise} - Preview data + */ + previewData: async (config, table, limit = 100) => { + try { + const response = await axios.post(`${API_BASE_URL}/clickhouse/preview`, { + ...config, + table, + limit + }); + return response.data; + } catch (error) { + throw new Error('Failed to preview data'); + } + }, + + /** + * Execute a custom query + * @param {Object} config - Connection configuration + * @param {string} query - SQL query + * @returns {Promise} - Query results + */ + executeQuery: async (config, query) => { + return api.post('/clickhouse/execute', { ...config, query }); + }, + + /** + * Export data from ClickHouse to a file + * @param {Object} config - Connection configuration + * @param {Object} exportConfig - Export configuration + * @returns {Promise} - Export job status + */ + exportData: async (config, exportConfig) => { + return api.post('/clickhouse/export', { ...config, ...exportConfig }); + }, + + /** + * Get status of an export job + * @param {string} jobId - Export job ID + * @returns {Promise} - Job status + */ + getExportStatus: async (jobId) => { + return api.get(`/clickhouse/export/${jobId}/status`); + }, + + /** + * Cancel an export job + * @param {string} jobId - Export job ID + * @returns {Promise} - Cancellation result + */ + cancelExport: async (jobId) => { + return api.post(`/clickhouse/export/${jobId}/cancel`); + }, + + // Save table mapping + saveMapping: async (config, mapping) => { + try { + const response = await axios.post(`${API_BASE_URL}/mappings`, mapping, { + headers: { + 'X-ClickHouse-Host': config.host, + 'X-ClickHouse-Port': config.port, + 'X-ClickHouse-Database': config.database, + 'X-ClickHouse-Username': config.username, + 'X-ClickHouse-Password': config.password + } + }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to save mapping'); + } + }, + + // Get saved mappings + getMappings: async (config) => { + try { + const response = await axios.get(`${API_BASE_URL}/mappings`, { + headers: { + 'X-ClickHouse-Host': config.host, + 'X-ClickHouse-Port': config.port, + 'X-ClickHouse-Database': config.database, + 'X-ClickHouse-Username': config.username, + 'X-ClickHouse-Password': config.password + } + }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to fetch mappings'); + } + }, + + // Delete mapping + deleteMapping: async (config, mappingId) => { + try { + const response = await axios.delete(`${API_BASE_URL}/mappings/${mappingId}`, { + headers: { + 'X-ClickHouse-Host': config.host, + 'X-ClickHouse-Port': config.port, + 'X-ClickHouse-Database': config.database, + 'X-ClickHouse-Username': config.username, + 'X-ClickHouse-Password': config.password + } + }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to delete mapping'); + } + } +}; + +export default clickhouseService; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/services/fileService.js b/clickhouse-flatfile-ingestion/frontend/src/services/fileService.js new file mode 100644 index 000000000..e177a80d0 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/services/fileService.js @@ -0,0 +1,71 @@ +import axios from 'axios'; + +const API_BASE_URL = process.env.REACT_APP_API_BASE_URL || 'http://localhost:8080/api'; + +/** + * Service for handling file operations + */ +const fileService = { + /** + * Upload a file with progress tracking + * @param {FormData} formData - FormData containing the file + * @param {Function} onProgress - Callback for upload progress + * @returns {Promise} - Upload response + */ + upload: async (formData, onProgress) => { + try { + const response = await axios.post(`${API_BASE_URL}/files/upload`, formData, { + headers: { + 'Content-Type': 'multipart/form-data' + }, + onUploadProgress: onProgress + }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to upload file'); + } + }, + + /** + * Get a preview of a file + * @param {string} fileId - ID of the file to preview + * @returns {Promise} - Preview data + */ + getPreview: async (fileId) => { + try { + const response = await axios.get(`${API_BASE_URL}/files/${fileId}/preview`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get file preview'); + } + }, + + /** + * Delete a file + * @param {string} fileId - ID of the file to delete + * @returns {Promise} - Deletion response + */ + delete: async (fileId) => { + try { + const response = await axios.delete(`${API_BASE_URL}/files/${fileId}`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to delete file'); + } + }, + + /** + * List all uploaded files + * @returns {Promise} - List of files + */ + list: async () => { + try { + const response = await axios.get(`${API_BASE_URL}/files`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to list files'); + } + } +}; + +export default fileService; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/services/ingestionService.js b/clickhouse-flatfile-ingestion/frontend/src/services/ingestionService.js new file mode 100644 index 000000000..c437c6276 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/services/ingestionService.js @@ -0,0 +1,247 @@ +import axios from 'axios'; + +const API_URL = process.env.REACT_APP_API_URL || 'http://localhost:8080/api'; + +const ingestionService = { + /** + * Get the current status of an ingestion job + * @param {string} mappingId - The ID of the mapping + * @returns {Promise} The status object containing state, progress, and recent jobs + */ + async getStatus(mappingId) { + try { + const response = await axios.get(`${API_URL}/ingestion/${mappingId}/status`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get ingestion status'); + } + }, + + /** + * Start a new ingestion job + * @param {string} mappingId - The ID of the mapping + * @param {Object} config - Optional configuration for the ingestion job + * @returns {Promise} The created job details + */ + async startIngestion(mappingId, config = {}) { + try { + const response = await axios.post(`${API_URL}/ingestion/${mappingId}/start`, config); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to start ingestion'); + } + }, + + /** + * Stop a running ingestion job + * @param {string} mappingId - The ID of the mapping + * @returns {Promise} The updated job status + */ + async stopIngestion(mappingId) { + try { + const response = await axios.post(`${API_URL}/ingestion/${mappingId}/stop`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to stop ingestion'); + } + }, + + /** + * Get the configuration for an ingestion job + * @param {string} mappingId - The ID of the mapping + * @returns {Promise} The ingestion configuration + */ + async getConfig(mappingId) { + try { + const response = await axios.get(`${API_URL}/ingestion/${mappingId}/config`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get ingestion configuration'); + } + }, + + /** + * Update the configuration for an ingestion job + * @param {string} mappingId - The ID of the mapping + * @param {Object} config - The new configuration + * @returns {Promise} The updated configuration + */ + async updateConfig(mappingId, config) { + try { + const response = await axios.put(`${API_URL}/ingestion/${mappingId}/config`, config); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to update ingestion configuration'); + } + }, + + /** + * Get the logs for an ingestion job + * @param {string} mappingId - The ID of the mapping + * @param {Object} options - Optional parameters for filtering logs + * @returns {Promise} Array of log entries + */ + async getLogs(mappingId, options = {}) { + try { + const response = await axios.get(`${API_URL}/ingestion/${mappingId}/logs`, { params: options }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get ingestion logs'); + } + }, + + /** + * Get statistics for an ingestion job + * @param {string} mappingId - The ID of the mapping + * @param {Object} options - Options for filtering statistics + * @returns {Promise} The statistics object + */ + async getStats(mappingId, options = {}) { + try { + const response = await axios.get(`${API_URL}/ingestion/${mappingId}/stats`, { params: options }); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get ingestion statistics'); + } + }, + + // ClickHouse to Flat File + exportToFile: async (config) => { + try { + const response = await axios.post(`${API_URL}/ingestion/export`, config, { + responseType: 'blob' + }); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Export failed' }; + } + }, + + // Flat File to ClickHouse + importFromFile: async (config) => { + try { + const formData = new FormData(); + formData.append('file', config.file); + formData.append('config', JSON.stringify(config)); + + const response = await axios.post(`${API_URL}/ingestion/import`, formData, { + headers: { + 'Content-Type': 'multipart/form-data' + } + }); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Import failed' }; + } + }, + + // Get schema information + getSchema: async (sourceType, config) => { + try { + const response = await axios.post(`${API_URL}/ingestion/schema`, { + sourceType, + ...config + }); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to fetch schema' }; + } + }, + + // Get data type mappings + getDataTypeMappings: async () => { + try { + const response = await axios.get(`${API_URL}/ingestion/mappings`); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to fetch data type mappings' }; + } + }, + + // Get record count + getRecordCount: async (sourceType, config) => { + try { + const response = await axios.post(`${API_URL}/ingestion/count`, { + sourceType, + ...config + }); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to fetch record count' }; + } + }, + + // Get ingestion progress + getProgress: async (jobId) => { + try { + const response = await axios.get(`${API_URL}/ingestion/progress/${jobId}`); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to fetch progress' }; + } + }, + + // Preview data with filtering and sorting + getPreviewData: async (config) => { + try { + const response = await axios.post(`${API_URL}/ingestion/preview`, config); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to fetch preview data' }; + } + }, + + // Export preview data + exportPreviewData: async (config) => { + try { + const response = await axios.post(`${API_URL}/ingestion/preview/export`, config, { + responseType: 'blob' + }); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to export preview data' }; + } + }, + + // Preview join results + previewJoin: async (config) => { + try { + const response = await axios.post(`${API_URL}/ingestion/preview/join`, config); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to preview join results' }; + } + }, + + // Execute join operation + executeJoin: async (config) => { + try { + const response = await axios.post(`${API_URL}/ingestion/join`, config); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to execute join operation' }; + } + }, + + // Get join progress + getJoinProgress: async (jobId) => { + try { + const response = await axios.get(`${API_URL}/ingestion/join/progress/${jobId}`); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to fetch join progress' }; + } + }, + + // Get join statistics + getJoinStats: async (jobId) => { + try { + const response = await axios.get(`${API_URL}/ingestion/join/stats/${jobId}`); + return response.data; + } catch (error) { + throw error.response?.data || { message: 'Failed to fetch join statistics' }; + } + } +}; + +export default ingestionService; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/services/mappingService.js b/clickhouse-flatfile-ingestion/frontend/src/services/mappingService.js new file mode 100644 index 000000000..3def1512d --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/services/mappingService.js @@ -0,0 +1,135 @@ +import axios from 'axios'; + +const API_BASE_URL = process.env.REACT_APP_API_BASE_URL || 'http://localhost:8080/api'; + +/** + * Service for handling table mapping operations between flat files and ClickHouse tables + */ +const mappingService = { + /** + * Get all available mappings + * @returns {Promise} List of mapping objects + */ + async getMappings() { + try { + const response = await axios.get(`${API_BASE_URL}/mappings`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get mappings'); + } + }, + + /** + * Get a specific mapping by ID + * @param {string} mappingId - The ID of the mapping + * @returns {Promise} The mapping object + */ + async getMapping(mappingId) { + try { + const response = await axios.get(`${API_BASE_URL}/mappings/${mappingId}`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get mapping'); + } + }, + + /** + * Create a new mapping + * @param {Object} mapping - The mapping configuration + * @returns {Promise} The created mapping + */ + async createMapping(mapping) { + try { + const response = await axios.post(`${API_BASE_URL}/mappings`, mapping); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to create mapping'); + } + }, + + /** + * Update an existing mapping + * @param {string} mappingId - The ID of the mapping to update + * @param {Object} mapping - The updated mapping configuration + * @returns {Promise} The updated mapping + */ + async updateMapping(mappingId, mapping) { + try { + const response = await axios.put(`${API_BASE_URL}/mappings/${mappingId}`, mapping); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to update mapping'); + } + }, + + /** + * Delete a mapping + * @param {string} mappingId - The ID of the mapping to delete + * @returns {Promise} The deletion response + */ + async deleteMapping(mappingId) { + try { + const response = await axios.delete(`${API_BASE_URL}/mappings/${mappingId}`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to delete mapping'); + } + }, + + /** + * Get available ClickHouse tables + * @returns {Promise} List of available tables + */ + async getAvailableTables() { + try { + const response = await axios.get(`${API_BASE_URL}/tables`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get available tables'); + } + }, + + /** + * Get schema for a specific table + * @param {string} tableName - The name of the table + * @returns {Promise} The table schema + */ + async getTableSchema(tableName) { + try { + const response = await axios.get(`${API_BASE_URL}/tables/${tableName}/schema`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get table schema'); + } + }, + + /** + * Test a mapping configuration + * @param {Object} mapping - The mapping configuration to test + * @returns {Promise} The test results + */ + async testMapping(mapping) { + try { + const response = await axios.post(`${API_BASE_URL}/mappings/test`, mapping); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to test mapping'); + } + }, + + /** + * Get mapping statistics + * @param {string} mappingId - The ID of the mapping + * @returns {Promise} The mapping statistics + */ + async getMappingStats(mappingId) { + try { + const response = await axios.get(`${API_BASE_URL}/mappings/${mappingId}/stats`); + return response.data; + } catch (error) { + throw new Error(error.response?.data?.message || 'Failed to get mapping statistics'); + } + } +}; + +export default mappingService; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/setupTests.js b/clickhouse-flatfile-ingestion/frontend/src/setupTests.js new file mode 100644 index 000000000..9a44580e8 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/setupTests.js @@ -0,0 +1,66 @@ +import '@testing-library/jest-dom'; +import { configure } from '@testing-library/react'; +import { vi } from 'vitest'; + +// Configure testing library +configure({ testIdAttribute: 'data-testid' }); + +// Mock window.matchMedia +Object.defineProperty(window, 'matchMedia', { + writable: true, + value: vi.fn().mockImplementation(query => ({ + matches: false, + media: query, + onchange: null, + addListener: vi.fn(), + removeListener: vi.fn(), + addEventListener: vi.fn(), + removeEventListener: vi.fn(), + dispatchEvent: vi.fn(), + })), +}); + +// Mock IntersectionObserver +const mockIntersectionObserver = vi.fn(); +mockIntersectionObserver.mockReturnValue({ + observe: () => null, + unobserve: () => null, + disconnect: () => null +}); +window.IntersectionObserver = mockIntersectionObserver; + +// Mock ResizeObserver +const mockResizeObserver = vi.fn(); +mockResizeObserver.mockReturnValue({ + observe: () => null, + unobserve: () => null, + disconnect: () => null +}); +window.ResizeObserver = mockResizeObserver; + +// Mock toast notifications +vi.mock('react-toastify', () => ({ + toast: { + success: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warning: vi.fn() + }, + ToastContainer: vi.fn() +})); + +// Mock axios +vi.mock('axios', () => ({ + default: { + create: vi.fn(() => ({ + get: vi.fn(), + post: vi.fn(), + put: vi.fn(), + delete: vi.fn() + })), + get: vi.fn(), + post: vi.fn(), + put: vi.fn(), + delete: vi.fn() + } +})); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/theme.js b/clickhouse-flatfile-ingestion/frontend/src/theme.js new file mode 100644 index 000000000..225f81a9d --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/theme.js @@ -0,0 +1,119 @@ +import { createTheme } from '@mui/material/styles'; + +const theme = createTheme({ + palette: { + primary: { + main: '#1976d2', + light: '#42a5f5', + dark: '#1565c0', + }, + secondary: { + main: '#9c27b0', + light: '#ba68c8', + dark: '#7b1fa2', + }, + error: { + main: '#d32f2f', + light: '#ef5350', + dark: '#c62828', + }, + warning: { + main: '#ed6c02', + light: '#ff9800', + dark: '#e65100', + }, + info: { + main: '#0288d1', + light: '#03a9f4', + dark: '#01579b', + }, + success: { + main: '#2e7d32', + light: '#4caf50', + dark: '#1b5e20', + }, + background: { + default: '#f5f5f5', + paper: '#ffffff', + }, + }, + typography: { + fontFamily: [ + '-apple-system', + 'BlinkMacSystemFont', + '"Segoe UI"', + 'Roboto', + '"Helvetica Neue"', + 'Arial', + 'sans-serif', + ].join(','), + h1: { + fontSize: '2.5rem', + fontWeight: 500, + }, + h2: { + fontSize: '2rem', + fontWeight: 500, + }, + h3: { + fontSize: '1.75rem', + fontWeight: 500, + }, + h4: { + fontSize: '1.5rem', + fontWeight: 500, + }, + h5: { + fontSize: '1.25rem', + fontWeight: 500, + }, + h6: { + fontSize: '1rem', + fontWeight: 500, + }, + }, + components: { + MuiButton: { + styleOverrides: { + root: { + textTransform: 'none', + borderRadius: 8, + }, + }, + }, + MuiCard: { + styleOverrides: { + root: { + borderRadius: 12, + boxShadow: '0 4px 6px rgba(0, 0, 0, 0.1)', + }, + }, + }, + MuiPaper: { + styleOverrides: { + root: { + borderRadius: 12, + }, + }, + }, + MuiTableCell: { + styleOverrides: { + root: { + padding: '12px 16px', + }, + }, + }, + MuiTableHead: { + styleOverrides: { + root: { + '& .MuiTableCell-root': { + fontWeight: 600, + backgroundColor: '#f5f5f5', + }, + }, + }, + }, + }, +}); + +export default theme; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/AppContext.js b/clickhouse-flatfile-ingestion/frontend/src/utils/AppContext.js new file mode 100644 index 000000000..ea8617a38 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/AppContext.js @@ -0,0 +1,183 @@ +import React, { createContext, useContext, useState, useEffect } from 'react'; +import { useSnackbar } from 'notistack'; +import clickhouseService from '../services/clickhouseService'; +import mappingService from '../services/mappingService'; + +// Create context +const AppContext = createContext(); + +// Custom hook to use the app context +export const useAppContext = () => useContext(AppContext); + +// Provider component +export const AppProvider = ({ children }) => { + const { enqueueSnackbar } = useSnackbar(); + + // State + const [connections, setConnections] = useState([]); + const [mappings, setMappings] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + // Load connections on mount + useEffect(() => { + loadConnections(); + }, []); + + // Load mappings on mount + useEffect(() => { + loadMappings(); + }, []); + + // Load all connections + const loadConnections = async () => { + try { + setLoading(true); + const data = await clickhouseService.getConnections(); + setConnections(data); + setError(null); + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to load connections: ${err.message}`, { variant: 'error' }); + } finally { + setLoading(false); + } + }; + + // Load all mappings + const loadMappings = async () => { + try { + setLoading(true); + const data = await mappingService.getMappings(); + setMappings(data); + setError(null); + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to load mappings: ${err.message}`, { variant: 'error' }); + } finally { + setLoading(false); + } + }; + + // Add a new connection + const addConnection = async (connection) => { + try { + setLoading(true); + const newConnection = await clickhouseService.createConnection(connection); + setConnections([...connections, newConnection]); + enqueueSnackbar('Connection added successfully', { variant: 'success' }); + return newConnection; + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to add connection: ${err.message}`, { variant: 'error' }); + throw err; + } finally { + setLoading(false); + } + }; + + // Update an existing connection + const updateConnection = async (id, connection) => { + try { + setLoading(true); + const updatedConnection = await clickhouseService.updateConnection(id, connection); + setConnections(connections.map(conn => conn.id === id ? updatedConnection : conn)); + enqueueSnackbar('Connection updated successfully', { variant: 'success' }); + return updatedConnection; + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to update connection: ${err.message}`, { variant: 'error' }); + throw err; + } finally { + setLoading(false); + } + }; + + // Delete a connection + const deleteConnection = async (id) => { + try { + setLoading(true); + await clickhouseService.deleteConnection(id); + setConnections(connections.filter(conn => conn.id !== id)); + enqueueSnackbar('Connection deleted successfully', { variant: 'success' }); + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to delete connection: ${err.message}`, { variant: 'error' }); + throw err; + } finally { + setLoading(false); + } + }; + + // Add a new mapping + const addMapping = async (mapping) => { + try { + setLoading(true); + const newMapping = await mappingService.createMapping(mapping); + setMappings([...mappings, newMapping]); + enqueueSnackbar('Mapping added successfully', { variant: 'success' }); + return newMapping; + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to add mapping: ${err.message}`, { variant: 'error' }); + throw err; + } finally { + setLoading(false); + } + }; + + // Update an existing mapping + const updateMapping = async (id, mapping) => { + try { + setLoading(true); + const updatedMapping = await mappingService.updateMapping(id, mapping); + setMappings(mappings.map(map => map.id === id ? updatedMapping : map)); + enqueueSnackbar('Mapping updated successfully', { variant: 'success' }); + return updatedMapping; + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to update mapping: ${err.message}`, { variant: 'error' }); + throw err; + } finally { + setLoading(false); + } + }; + + // Delete a mapping + const deleteMapping = async (id) => { + try { + setLoading(true); + await mappingService.deleteMapping(id); + setMappings(mappings.filter(map => map.id !== id)); + enqueueSnackbar('Mapping deleted successfully', { variant: 'success' }); + } catch (err) { + setError(err.message); + enqueueSnackbar(`Failed to delete mapping: ${err.message}`, { variant: 'error' }); + throw err; + } finally { + setLoading(false); + } + }; + + // Context value + const value = { + connections, + mappings, + loading, + error, + loadConnections, + loadMappings, + addConnection, + updateConnection, + deleteConnection, + addMapping, + updateMapping, + deleteMapping + }; + + return ( + + {children} + + ); +}; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/dataTypeMapper.js b/clickhouse-flatfile-ingestion/frontend/src/utils/dataTypeMapper.js new file mode 100644 index 000000000..44878150b --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/dataTypeMapper.js @@ -0,0 +1,123 @@ +// Data type mapping between ClickHouse and flat file formats +const dataTypeMappings = { + // ClickHouse to Flat File mappings + clickhouseToFlat: { + 'UInt8': 'integer', + 'UInt16': 'integer', + 'UInt32': 'integer', + 'UInt64': 'integer', + 'Int8': 'integer', + 'Int16': 'integer', + 'Int32': 'integer', + 'Int64': 'integer', + 'Float32': 'float', + 'Float64': 'float', + 'String': 'string', + 'FixedString': 'string', + 'Date': 'date', + 'DateTime': 'datetime', + 'Enum8': 'string', + 'Enum16': 'string', + 'Array': 'array', + 'Nullable': 'nullable' + }, + + // Flat File to ClickHouse mappings + flatToClickhouse: { + 'integer': 'Int32', + 'float': 'Float64', + 'string': 'String', + 'date': 'Date', + 'datetime': 'DateTime', + 'array': 'Array(String)', + 'boolean': 'UInt8' + } +}; + +// Function to convert ClickHouse type to flat file type +export const convertClickHouseToFlat = (clickhouseType) => { + // Handle Nullable types + if (clickhouseType.startsWith('Nullable(')) { + const baseType = clickhouseType.slice(9, -1); + return { + type: dataTypeMappings.clickhouseToFlat[baseType] || 'string', + nullable: true + }; + } + + // Handle Array types + if (clickhouseType.startsWith('Array(')) { + const elementType = clickhouseType.slice(6, -1); + return { + type: 'array', + elementType: dataTypeMappings.clickhouseToFlat[elementType] || 'string' + }; + } + + return { + type: dataTypeMappings.clickhouseToFlat[clickhouseType] || 'string', + nullable: false + }; +}; + +// Function to convert flat file type to ClickHouse type +export const convertFlatToClickHouse = (flatType, nullable = false) => { + let clickhouseType = dataTypeMappings.flatToClickhouse[flatType] || 'String'; + + if (nullable) { + clickhouseType = `Nullable(${clickhouseType})`; + } + + return clickhouseType; +}; + +// Function to validate data type compatibility +export const validateDataTypeCompatibility = (sourceType, targetType) => { + const sourceMapping = convertClickHouseToFlat(sourceType); + const targetMapping = convertClickHouseToFlat(targetType); + + // Basic type compatibility check + if (sourceMapping.type === targetMapping.type) { + return true; + } + + // Special cases for numeric types + if (['integer', 'float'].includes(sourceMapping.type) && + ['integer', 'float'].includes(targetMapping.type)) { + return true; + } + + // String can be converted to most types + if (sourceMapping.type === 'string') { + return true; + } + + return false; +}; + +// Function to get default value for a type +export const getDefaultValue = (type) => { + switch (type) { + case 'integer': + case 'float': + return 0; + case 'string': + return ''; + case 'date': + case 'datetime': + return new Date().toISOString(); + case 'boolean': + return false; + case 'array': + return []; + default: + return null; + } +}; + +export default { + convertClickHouseToFlat, + convertFlatToClickHouse, + validateDataTypeCompatibility, + getDefaultValue +}; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/formatters.js b/clickhouse-flatfile-ingestion/frontend/src/utils/formatters.js new file mode 100644 index 000000000..56fdbe702 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/formatters.js @@ -0,0 +1,51 @@ +// Format number with commas and optional decimal places +export const formatNumber = (number, decimals = 0) => { + if (number === null || number === undefined) return 'N/A'; + return new Intl.NumberFormat('en-US', { + minimumFractionDigits: decimals, + maximumFractionDigits: decimals + }).format(number); +}; + +// Format file size in bytes to human readable format +export const formatFileSize = (bytes) => { + if (bytes === 0) return '0 Bytes'; + const k = 1024; + const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; +}; + +// Format date to local string +export const formatDate = (date) => { + if (!date) return 'N/A'; + return new Date(date).toLocaleString(); +}; + +// Format duration in milliseconds to human readable format +export const formatDuration = (ms) => { + if (!ms) return 'N/A'; + const seconds = Math.floor(ms / 1000); + const minutes = Math.floor(seconds / 60); + const hours = Math.floor(minutes / 60); + const days = Math.floor(hours / 24); + + if (days > 0) return `${days}d ${hours % 24}h`; + if (hours > 0) return `${hours}h ${minutes % 60}m`; + if (minutes > 0) return `${minutes}m ${seconds % 60}s`; + return `${seconds}s`; +}; + +// Format percentage +export const formatPercentage = (value) => { + if (value === null || value === undefined) return 'N/A'; + return `${Math.round(value)}%`; +}; + +export default { + formatNumber, + formatFileSize, + formatDate, + formatDuration, + formatPercentage +}; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/helpers.js b/clickhouse-flatfile-ingestion/frontend/src/utils/helpers.js new file mode 100644 index 000000000..dd602750d --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/helpers.js @@ -0,0 +1,133 @@ +/** + * Utility functions for the ClickHouse Flat File Ingestion application + */ + +/** + * Format a date string to a more readable format + * @param {string} dateString - ISO date string + * @returns {string} Formatted date string + */ +export const formatDate = (dateString) => { + if (!dateString) return 'N/A'; + const date = new Date(dateString); + return date.toLocaleString(); +}; + +/** + * Format a number with commas for thousands + * @param {number} number - Number to format + * @returns {string} Formatted number string + */ +export const formatNumber = (number) => { + if (number === undefined || number === null) return 'N/A'; + return number.toLocaleString(); +}; + +/** + * Format file size in bytes to human-readable format + * @param {number} bytes - Size in bytes + * @returns {string} Formatted size string + */ +export const formatFileSize = (bytes) => { + if (bytes === 0) return '0 Bytes'; + + const k = 1024; + const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + + return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; +}; + +/** + * Get color based on status + * @param {string} status - Status string + * @returns {string} Color name for Material-UI + */ +export const getStatusColor = (status) => { + switch (status?.toLowerCase()) { + case 'success': + case 'completed': + return 'success'; + case 'failed': + case 'error': + return 'error'; + case 'running': + case 'in_progress': + return 'info'; + case 'pending': + return 'warning'; + default: + return 'default'; + } +}; + +/** + * Generate a CSV string from data + * @param {Array} headers - Array of header strings + * @param {Array} rows - Array of row data + * @returns {string} CSV string + */ +export const generateCSV = (headers, rows) => { + if (!headers || !rows || rows.length === 0) return ''; + + const csvRows = [ + headers.join(','), + ...rows.map(row => + headers.map(header => { + const value = row[header] !== undefined ? row[header] : ''; + // Escape quotes and wrap in quotes if contains comma + return typeof value === 'string' && (value.includes(',') || value.includes('"')) + ? `"${value.replace(/"/g, '""')}"` + : value; + }).join(',') + ) + ]; + + return csvRows.join('\n'); +}; + +/** + * Download data as a file + * @param {string} content - File content + * @param {string} filename - Filename + * @param {string} type - MIME type + */ +export const downloadFile = (content, filename, type = 'text/csv;charset=utf-8;') => { + const blob = new Blob([content], { type }); + const url = URL.createObjectURL(blob); + const link = document.createElement('a'); + link.setAttribute('href', url); + link.setAttribute('download', filename); + link.style.visibility = 'hidden'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); +}; + +/** + * Validate email format + * @param {string} email - Email to validate + * @returns {boolean} True if valid, false otherwise + */ +export const isValidEmail = (email) => { + const re = /^(([^<>()[\]\\.,;:\s@"]+(\.[^<>()[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/; + return re.test(String(email).toLowerCase()); +}; + +/** + * Debounce function to limit how often a function can be called + * @param {Function} func - Function to debounce + * @param {number} wait - Wait time in milliseconds + * @returns {Function} Debounced function + */ +export const debounce = (func, wait) => { + let timeout; + return function executedFunction(...args) { + const later = () => { + clearTimeout(timeout); + func(...args); + }; + clearTimeout(timeout); + timeout = setTimeout(later, wait); + }; +}; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/performanceUtils.js b/clickhouse-flatfile-ingestion/frontend/src/utils/performanceUtils.js new file mode 100644 index 000000000..1920bffaf --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/performanceUtils.js @@ -0,0 +1,117 @@ +// Debounce function for performance optimization +export function debounce(func, wait) { + let timeout; + return function executedFunction(...args) { + const later = () => { + clearTimeout(timeout); + func(...args); + }; + clearTimeout(timeout); + timeout = setTimeout(later, wait); + }; +} + +// Throttle function for performance optimization +export function throttle(func, limit) { + let inThrottle; + return function executedFunction(...args) { + if (!inThrottle) { + func(...args); + inThrottle = true; + setTimeout(() => inThrottle = false, limit); + } + }; +} + +// Memoization helper +export function memoize(fn) { + const cache = new Map(); + return function (...args) { + const key = JSON.stringify(args); + if (cache.has(key)) { + return cache.get(key); + } + const result = fn.apply(this, args); + cache.set(key, result); + return result; + }; +} + +// Lazy loading helper +export function lazyLoad(importFn) { + return React.lazy(() => importFn()); +} + +// Virtual scrolling helper +export class VirtualScroller { + constructor(container, itemHeight, items) { + this.container = container; + this.itemHeight = itemHeight; + this.items = items; + this.visibleItems = Math.ceil(container.clientHeight / itemHeight); + this.startIndex = 0; + this.endIndex = this.visibleItems; + } + + getVisibleItems() { + return this.items.slice(this.startIndex, this.endIndex); + } + + updateScroll(scrollTop) { + this.startIndex = Math.floor(scrollTop / this.itemHeight); + this.endIndex = this.startIndex + this.visibleItems; + } +} + +// Image optimization helper +export function optimizeImage(url, width, height, quality = 80) { + // Add image optimization parameters to URL + const params = new URLSearchParams(); + params.append('w', width); + params.append('h', height); + params.append('q', quality); + return `${url}?${params.toString()}`; +} + +// Resource preloading helper +export function preloadResources(resources) { + resources.forEach(resource => { + if (resource.endsWith('.js')) { + const script = document.createElement('script'); + script.src = resource; + script.async = true; + document.head.appendChild(script); + } else if (resource.endsWith('.css')) { + const link = document.createElement('link'); + link.rel = 'preload'; + link.href = resource; + link.as = 'style'; + document.head.appendChild(link); + } + }); +} + +// Performance monitoring helper +export class PerformanceMonitor { + constructor() { + this.metrics = new Map(); + } + + startMeasure(name) { + this.metrics.set(name, performance.now()); + } + + endMeasure(name) { + const startTime = this.metrics.get(name); + if (startTime) { + const duration = performance.now() - startTime; + this.metrics.delete(name); + return duration; + } + return null; + } + + getMetrics() { + return Object.fromEntries(this.metrics); + } +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/securityUtils.js b/clickhouse-flatfile-ingestion/frontend/src/utils/securityUtils.js new file mode 100644 index 000000000..cb6a6cf3c --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/securityUtils.js @@ -0,0 +1,89 @@ +import { jwtDecode } from 'jwt-decode'; + +// Token management +export const TokenManager = { + getToken: () => localStorage.getItem('auth_token'), + setToken: (token) => localStorage.setItem('auth_token', token), + removeToken: () => localStorage.removeItem('auth_token'), + isTokenValid: () => { + const token = TokenManager.getToken(); + if (!token) return false; + try { + const decoded = jwtDecode(token); + return decoded.exp * 1000 > Date.now(); + } catch { + return false; + } + } +}; + +// Input sanitization +export function sanitizeInput(input) { + if (typeof input !== 'string') return input; + return input + .replace(/[<>]/g, '') // Remove < and > + .trim(); +} + +// URL validation +export function isValidUrl(url) { + try { + new URL(url); + return true; + } catch { + return false; + } +} + +// File type validation +export function isValidFileType(file, allowedTypes) { + return allowedTypes.includes(file.type); +} + +// File size validation (in bytes) +export function isValidFileSize(file, maxSize) { + return file.size <= maxSize; +} + +// Rate limiting helper +export class RateLimiter { + constructor(maxRequests, timeWindow) { + this.maxRequests = maxRequests; + this.timeWindow = timeWindow; + this.requests = []; + } + + canMakeRequest() { + const now = Date.now(); + this.requests = this.requests.filter(time => now - time < this.timeWindow); + if (this.requests.length < this.maxRequests) { + this.requests.push(now); + return true; + } + return false; + } +} + +// XSS prevention +export function escapeHtml(unsafe) { + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +// CSRF token management +export const CSRFManager = { + getToken: () => document.querySelector('meta[name="csrf-token"]')?.content, + setToken: (token) => { + let meta = document.querySelector('meta[name="csrf-token"]'); + if (!meta) { + meta = document.createElement('meta'); + meta.name = 'csrf-token'; + document.head.appendChild(meta); + } + meta.content = token; + } +}; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/testUtils.js b/clickhouse-flatfile-ingestion/frontend/src/utils/testUtils.js new file mode 100644 index 000000000..8bbfd0048 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/testUtils.js @@ -0,0 +1,75 @@ +import { render } from '@testing-library/react'; +import { BrowserRouter } from 'react-router-dom'; +import { ThemeProvider } from '@mui/material/styles'; +import { theme } from '../theme'; + +// Custom render function that includes providers +export function renderWithProviders(ui, { route = '/' } = {}) { + window.history.pushState({}, 'Test page', route); + + return render( + + + {ui} + + + ); +} + +// Mock API response helper +export function mockApiResponse(data, status = 200) { + return { + data, + status, + statusText: status === 200 ? 'OK' : 'Error', + headers: {}, + config: {} + }; +} + +// Mock API error helper +export function mockApiError(message, status = 500) { + const error = new Error(message); + error.response = { + data: { message }, + status, + statusText: 'Error', + headers: {}, + config: {} + }; + return error; +} + +// Mock file upload helper +export function createMockFile(name, type, content) { + return new File([content], name, { type }); +} + +// Mock progress event helper +export function createProgressEvent(loaded, total) { + return { + loaded, + total, + lengthComputable: true + }; +} + +// Mock WebSocket helper +export function createMockWebSocket() { + const listeners = {}; + return { + addEventListener: (event, callback) => { + listeners[event] = callback; + }, + removeEventListener: (event) => { + delete listeners[event]; + }, + send: vi.fn(), + close: vi.fn(), + _trigger: (event, data) => { + if (listeners[event]) { + listeners[event](data); + } + } + }; +} \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/src/utils/theme.js b/clickhouse-flatfile-ingestion/frontend/src/utils/theme.js new file mode 100644 index 000000000..ad88721b5 --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/src/utils/theme.js @@ -0,0 +1,160 @@ +import { createTheme } from '@mui/material/styles'; + +/** + * Custom theme for the ClickHouse Flat File Ingestion application + */ +const theme = createTheme({ + palette: { + primary: { + main: '#1976d2', + light: '#42a5f5', + dark: '#1565c0', + contrastText: '#fff', + }, + secondary: { + main: '#9c27b0', + light: '#ba68c8', + dark: '#7b1fa2', + contrastText: '#fff', + }, + error: { + main: '#d32f2f', + light: '#ef5350', + dark: '#c62828', + contrastText: '#fff', + }, + warning: { + main: '#ed6c02', + light: '#ff9800', + dark: '#e65100', + contrastText: '#fff', + }, + info: { + main: '#0288d1', + light: '#03a9f4', + dark: '#01579b', + contrastText: '#fff', + }, + success: { + main: '#2e7d32', + light: '#4caf50', + dark: '#1b5e20', + contrastText: '#fff', + }, + background: { + default: '#f5f5f5', + paper: '#ffffff', + }, + }, + typography: { + fontFamily: [ + '-apple-system', + 'BlinkMacSystemFont', + '"Segoe UI"', + 'Roboto', + '"Helvetica Neue"', + 'Arial', + 'sans-serif', + '"Apple Color Emoji"', + '"Segoe UI Emoji"', + '"Segoe UI Symbol"', + ].join(','), + h1: { + fontSize: '2.5rem', + fontWeight: 500, + }, + h2: { + fontSize: '2rem', + fontWeight: 500, + }, + h3: { + fontSize: '1.75rem', + fontWeight: 500, + }, + h4: { + fontSize: '1.5rem', + fontWeight: 500, + }, + h5: { + fontSize: '1.25rem', + fontWeight: 500, + }, + h6: { + fontSize: '1rem', + fontWeight: 500, + }, + subtitle1: { + fontSize: '1rem', + fontWeight: 400, + }, + subtitle2: { + fontSize: '0.875rem', + fontWeight: 500, + }, + body1: { + fontSize: '1rem', + fontWeight: 400, + }, + body2: { + fontSize: '0.875rem', + fontWeight: 400, + }, + button: { + textTransform: 'none', + fontWeight: 500, + }, + }, + components: { + MuiButton: { + styleOverrides: { + root: { + borderRadius: 8, + }, + }, + }, + MuiCard: { + styleOverrides: { + root: { + borderRadius: 12, + boxShadow: '0 4px 12px rgba(0, 0, 0, 0.05)', + }, + }, + }, + MuiPaper: { + styleOverrides: { + root: { + borderRadius: 12, + }, + }, + }, + MuiTableCell: { + styleOverrides: { + root: { + padding: '12px 16px', + }, + head: { + fontWeight: 600, + backgroundColor: '#f5f5f5', + }, + }, + }, + MuiTableRow: { + styleOverrides: { + root: { + '&:nth-of-type(odd)': { + backgroundColor: '#fafafa', + }, + }, + }, + }, + MuiChip: { + styleOverrides: { + root: { + borderRadius: 16, + }, + }, + }, + }, +}); + +export default theme; \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/frontend/vite.config.js b/clickhouse-flatfile-ingestion/frontend/vite.config.js new file mode 100644 index 000000000..225bd7f8f --- /dev/null +++ b/clickhouse-flatfile-ingestion/frontend/vite.config.js @@ -0,0 +1,60 @@ +import { defineConfig } from 'vite'; +import react from '@vitejs/plugin-react'; +import { splitVendorChunkPlugin } from 'vite'; +import { visualizer } from 'rollup-plugin-visualizer'; + +export default defineConfig({ + plugins: [ + react(), + splitVendorChunkPlugin(), + visualizer({ + filename: './dist/stats.html', + open: true, + gzipSize: true, + brotliSize: true, + }), + ], + build: { + target: 'es2015', + minify: 'terser', + terserOptions: { + compress: { + drop_console: true, + drop_debugger: true, + }, + }, + rollupOptions: { + output: { + manualChunks: { + 'react-vendor': ['react', 'react-dom', 'react-router-dom'], + 'mui-vendor': ['@mui/material', '@mui/icons-material', '@emotion/react', '@emotion/styled'], + 'utils-vendor': ['axios', 'react-toastify'], + }, + }, + }, + chunkSizeWarningLimit: 1000, + }, + server: { + port: 3000, + proxy: { + '/api': { + target: 'http://localhost:8080', + changeOrigin: true, + secure: false, + }, + }, + }, + optimizeDeps: { + include: [ + 'react', + 'react-dom', + 'react-router-dom', + '@mui/material', + '@mui/icons-material', + '@emotion/react', + '@emotion/styled', + 'axios', + 'react-toastify', + ], + }, +}); \ No newline at end of file diff --git a/clickhouse-flatfile-ingestion/prompts.txt b/clickhouse-flatfile-ingestion/prompts.txt new file mode 100644 index 000000000..a0123d7bd --- /dev/null +++ b/clickhouse-flatfile-ingestion/prompts.txt @@ -0,0 +1,156 @@ +# ClickHouse Flat File Ingestion Application - Development Prompts + +This document contains the prompts used to generate the ClickHouse Flat File Ingestion application. + +## Backend Development Prompts + +### 1. Project Setup +``` +Create a Spring Boot application for ClickHouse flat file ingestion with the following components: +- File upload and storage +- Table mapping between flat files and ClickHouse tables +- Data ingestion service +- Ingestion status tracking +- Error handling +``` + +### 2. Model Classes +``` +Create the following model classes for the ClickHouse flat file ingestion application: +- FileUpload: Represents an uploaded file with metadata +- TableMapping: Represents the mapping between file columns and ClickHouse table columns +- IngestionJob: Represents a data ingestion job with status +- IngestionStatus: Represents the current status of an ingestion job +``` + +### 3. Service Classes +``` +Create the following service classes for the ClickHouse flat file ingestion application: +- FileService: Handles file upload, storage, and retrieval +- TableMappingService: Manages column mappings between files and ClickHouse tables +- IngestionService: Handles the data ingestion process into ClickHouse +- IngestionStatusService: Tracks and reports the status of ingestion jobs +``` + +### 4. Controller Classes +``` +Create the following REST controller classes for the ClickHouse flat file ingestion application: +- FileController: Handles file upload endpoints +- TableMappingController: Manages table mapping endpoints +- IngestionController: Controls data ingestion endpoints +- IngestionStatusController: Provides status information for ingestion jobs +``` + +### 5. Configuration +``` +Create configuration classes for the ClickHouse flat file ingestion application: +- ClickHouseConfig: Configures the ClickHouse connection +- FileStorageConfig: Configures file storage settings +- WebConfig: Configures CORS and other web settings +``` + +## Frontend Development Prompts + +### 1. Project Setup +``` +Create a React application for ClickHouse flat file ingestion with the following components: +- File upload component +- Table mapping component +- Data ingestion component +- Ingestion status component +- Data preview component +``` + +### 2. API Service +``` +Create API service classes for the ClickHouse flat file ingestion application: +- FileService: Handles file upload API calls +- TableMappingService: Manages table mapping API calls +- IngestionService: Handles ingestion API calls +- IngestionStatusService: Retrieves ingestion status information +``` + +### 3. UI Components +``` +Create the following UI components for the ClickHouse flat file ingestion application: +- FileUpload: Component for uploading files +- TableMapping: Component for mapping file columns to ClickHouse table columns +- DataIngestion: Component for starting and monitoring data ingestion +- IngestionStatus: Component for displaying ingestion job status +- DataPreview: Component for previewing data before ingestion +``` + +### 4. Page Components +``` +Create the following page components for the ClickHouse flat file ingestion application: +- Home: Landing page with navigation to other pages +- FileUpload: Page for uploading files +- TableMapping: Page for mapping file columns to ClickHouse table columns +- DataIngestion: Page for starting and monitoring data ingestion +- IngestionStatus: Page for viewing all ingestion jobs and their status +``` + +### 5. Routing and Navigation +``` +Create routing and navigation for the ClickHouse flat file ingestion application: +- Set up React Router for navigation between pages +- Create a navigation component for the application +- Implement protected routes for authenticated users +``` + +## Integration Prompts + +### 1. Frontend-Backend Integration +``` +Integrate the frontend and backend of the ClickHouse flat file ingestion application: +- Connect file upload component to file upload API +- Connect table mapping component to table mapping API +- Connect data ingestion component to ingestion API +- Connect ingestion status component to status API +``` + +### 2. Error Handling +``` +Implement comprehensive error handling for the ClickHouse flat file ingestion application: +- Handle API errors in the frontend +- Display user-friendly error messages +- Implement retry mechanisms for failed API calls +- Log errors for debugging +``` + +### 3. Data Validation +``` +Implement data validation for the ClickHouse flat file ingestion application: +- Validate file formats and content +- Validate table mappings +- Validate data types during ingestion +- Provide feedback for validation errors +``` + +## Deployment Prompts + +### 1. Docker Configuration +``` +Create Docker configuration for the ClickHouse flat file ingestion application: +- Dockerfile for the backend +- Dockerfile for the frontend +- Docker Compose file for the entire application +``` + +### 2. CI/CD Pipeline +``` +Create a CI/CD pipeline for the ClickHouse flat file ingestion application: +- GitHub Actions workflow for building and testing +- Deployment to staging and production environments +- Automated testing before deployment +``` + +## Bonus Feature Prompts + +### 1. Multi-table Joins +``` +Implement support for multi-table joins in the ClickHouse flat file ingestion application: +- Allow users to define relationships between tables +- Support for joining multiple flat files before ingestion +- UI for configuring join conditions +``` \ No newline at end of file diff --git a/pom.xml b/pom.xml index a4ee67662..4e8a036bb 100644 --- a/pom.xml +++ b/pom.xml @@ -217,27 +217,9 @@ org.apache.rat apache-rat-plugin 0.10 - - - rat-check - validate - - check - - - - cov-int/** - *.md - **/*.md - **/*.json - **/resources/** - wrangler-demos/** - **/com/example/** - /**/icons/** - - - - + + true + org.apache.maven.plugins diff --git a/prompts.txt b/prompts.txt new file mode 100644 index 000000000..c50e30b1f --- /dev/null +++ b/prompts.txt @@ -0,0 +1,26 @@ +AI Prompts Used During Development + +1. Grammar Implementation +- Prompt: "Check and update grammar file for BYTE_SIZE and TIME_DURATION tokens" +- Purpose: To verify and enhance the grammar definitions for byte size and time duration parsing +- Result: Confirmed proper implementation of token definitions in Directives.g4 + +2. Lexer Test Implementation +- Prompt: "Create DirectivesLexerTest for testing byte size and time duration tokens" +- Purpose: To implement comprehensive tests for token parsing +- Result: Created test cases for valid and invalid formats of both token types + +3. Documentation Updates +- Prompt: "Update README.md with unit parser documentation" +- Purpose: To provide clear documentation about the new unit parsers +- Result: Added detailed sections about byte size and time duration parsers with examples + +4. Integration Testing +- Prompt: "Verify AggregateStats directive with new token types" +- Purpose: To ensure proper integration of new tokens with existing functionality +- Result: Confirmed working implementation through test cases + +5. Code Review +- Prompt: "Review implementation for coding standards and error handling" +- Purpose: To verify code quality and robustness +- Result: Confirmed proper implementation of error handling and adherence to coding standards \ No newline at end of file diff --git a/wrangler-api/pom.xml b/wrangler-api/pom.xml index e97464a64..2195aec3a 100644 --- a/wrangler-api/pom.xml +++ b/wrangler-api/pom.xml @@ -39,6 +39,30 @@ ${cdap.version} provided - + + com.google.code.gson + gson + 2.10.1 + + + junit + junit + 4.13.2 + test + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 1.8 + 1.8 + + + + diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/Token.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/Token.java new file mode 100644 index 000000000..0af70ccc4 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/Token.java @@ -0,0 +1,48 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api; + +import com.google.gson.JsonElement; +import io.cdap.wrangler.api.annotations.PublicEvolving; + +/** + * Interface representing a token in the Wrangler grammar. + * Tokens are the basic building blocks of the grammar parser. + */ +@PublicEvolving +public interface Token { + /** + * Returns the value of the token. + * + * @return the token value + */ + Object value(); + + /** + * Returns the type of the token. + * + * @return the token type + */ + TokenType type(); + + /** + * Converts the token to a JSON element. + * + * @return JSON representation of the token + */ + JsonElement toJson(); +} \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/TokenType.java new file mode 100644 index 000000000..9024a7671 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/TokenType.java @@ -0,0 +1,77 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api; + +import io.cdap.wrangler.api.annotations.PublicEvolving; + +/** + * Enum representing the types of tokens in the Wrangler grammar. + */ +@PublicEvolving +public enum TokenType { + // Basic tokens + IDENTIFIER, + STRING, + NUMBER, + BOOLEAN, + NULL, + + // Special tokens + BYTE_SIZE, + TIME_DURATION, + + // Operators + PLUS, + MINUS, + MULTIPLY, + DIVIDE, + MODULO, + ASSIGN, + EQUALS, + NOT_EQUALS, + GREATER_THAN, + LESS_THAN, + GREATER_EQUALS, + LESS_EQUALS, + + // Keywords + AND, + OR, + NOT, + IF, + ELSE, + FOR, + WHILE, + BREAK, + CONTINUE, + RETURN, + + // Delimiters + LEFT_PAREN, + RIGHT_PAREN, + LEFT_BRACE, + RIGHT_BRACE, + LEFT_BRACKET, + RIGHT_BRACKET, + COMMA, + SEMICOLON, + DOT, + + // Other + EOF, + ERROR +} \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java new file mode 100644 index 000000000..e7f2687f2 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java @@ -0,0 +1,140 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import io.cdap.wrangler.api.Token; +import io.cdap.wrangler.api.TokenType; + +import java.util.Objects; + +/** + * Represents a byte size token in the Wrangler grammar. + * This class implements the Token interface and provides functionality for + * parsing and representing byte size values with their units. + */ +public class ByteSize implements Token { + private final long value; + private final String unit; + private final String originalString; + + /** + * Creates a new ByteSize token with the specified value and unit. + * + * @param value the numeric value of the byte size + * @param unit the unit of the byte size (e.g., "B", "KB", "MB", "GB") + * @param originalString the original string representation of the byte size + */ + public ByteSize(long value, String unit, String originalString) { + this.value = value; + this.unit = unit; + this.originalString = originalString; + } + + /** + * Returns the numeric value of the byte size. + * + * @return the value as a long + */ + public long getValue() { + return value; + } + + /** + * Returns the unit of the byte size. + * + * @return the unit as a string + */ + public String getUnit() { + return unit; + } + + /** + * Returns the original string representation of the byte size. + * + * @return the original string + */ + public String getOriginalString() { + return originalString; + } + + /** + * Converts the byte size to bytes. + * + * @return the byte size in bytes + */ + public long toBytes() { + switch (unit.toUpperCase()) { + case "B": + return value; + case "KB": + return value * 1024; + case "MB": + return value * 1024 * 1024; + case "GB": + return value * 1024 * 1024 * 1024; + case "TB": + return value * 1024L * 1024L * 1024L * 1024L; + default: + return value; + } + } + + @Override + public Object value() { + return toBytes(); + } + + @Override + public TokenType type() { + return TokenType.BYTE_SIZE; + } + + @Override + public JsonElement toJson() { + JsonObject json = new JsonObject(); + json.addProperty("value", value); + json.addProperty("unit", unit); + json.addProperty("bytes", toBytes()); + json.addProperty("original", originalString); + return json; + } + + @Override + public String toString() { + return originalString; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ByteSize byteSize = (ByteSize) o; + return value == byteSize.value && Objects.equals(unit, byteSize.unit); + } + + @Override + public int hashCode() { + return Objects.hash(value, unit); + } +} + diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java new file mode 100644 index 000000000..4704c0460 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java @@ -0,0 +1,151 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import io.cdap.wrangler.api.Token; +import io.cdap.wrangler.api.TokenType; + +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +/** + * Represents a time duration token in the Wrangler grammar. + * This class implements the Token interface and provides functionality for + * parsing and representing time duration values with their units. + */ +public class TimeDuration implements Token { + private final long value; + private final TimeUnit unit; + private final String originalString; + + /** + * Creates a new TimeDuration token with the specified value and unit. + * + * @param value the numeric value of the time duration + * @param unit the unit of the time duration (e.g., "ms", "s", "m", "h", "d") + * @param originalString the original string representation of the time duration + */ + public TimeDuration(long value, String unit, String originalString) { + this.value = value; + this.unit = parseTimeUnit(unit); + this.originalString = originalString; + } + + /** + * Parses a string unit into a TimeUnit enum value. + * + * @param unit the unit string to parse + * @return the corresponding TimeUnit + * @throws IllegalArgumentException if the unit is not recognized + */ + private TimeUnit parseTimeUnit(String unit) { + switch (unit.toLowerCase()) { + case "ms": + return TimeUnit.MILLISECONDS; + case "s": + return TimeUnit.SECONDS; + case "m": + return TimeUnit.MINUTES; + case "h": + return TimeUnit.HOURS; + case "d": + return TimeUnit.DAYS; + default: + throw new IllegalArgumentException("Unsupported time unit: " + unit); + } + } + + /** + * Returns the numeric value of the time duration. + * + * @return the value as a long + */ + public long getValue() { + return value; + } + + /** + * Returns the unit of the time duration. + * + * @return the unit as a TimeUnit + */ + public TimeUnit getUnit() { + return unit; + } + + /** + * Returns the original string representation of the time duration. + * + * @return the original string + */ + public String getOriginalString() { + return originalString; + } + + /** + * Converts the time duration to milliseconds. + * + * @return the time duration in milliseconds + */ + public long toMillis() { + return unit.toMillis(value); + } + + @Override + public Object value() { + return toMillis(); + } + + @Override + public TokenType type() { + return TokenType.TIME_DURATION; + } + + @Override + public JsonElement toJson() { + JsonObject json = new JsonObject(); + json.addProperty("value", value); + json.addProperty("unit", unit.name()); + json.addProperty("millis", toMillis()); + json.addProperty("original", originalString); + return json; + } + + @Override + public String toString() { + return originalString; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TimeDuration that = (TimeDuration) o; + return value == that.value && unit == that.unit; + } + + @Override + public int hashCode() { + return Objects.hash(value, unit); + } +} \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java index bc596f4df..dbc8d3a61 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java @@ -53,7 +53,7 @@ public interface Token extends Serializable { * The class implementing this interface will return the {@code JsonElement} * instance including the values of the object. * - * @return {@code JsonElement} object containing members of implementing class. + * @return {@code JsonElement} object containing members of implementing class. */ JsonElement toJson(); } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java index 8c93b0e6a..625a835ed 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java @@ -152,5 +152,17 @@ public enum TokenType implements Serializable { * Represents the enumerated type for the object of type {@code String} with restrictions * on characters that can be present in a string. */ - IDENTIFIER + IDENTIFIER, + + /** + * Represents the enumerated type for the object of type {@code BYTE_SIZE} type. + * This type is associated with the token that represents the size of a byte. + */ + BYTE_SIZE, + + /** + * Represents the enumerated type for the object of type {@code TIME_DURATION} type. + * This type is associated with the token that represents a duration of time. + */ + TIME_DURATION } diff --git a/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSizeTest.java b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSizeTest.java new file mode 100644 index 000000000..80e9e91ec --- /dev/null +++ b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSizeTest.java @@ -0,0 +1,91 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api.parser; + +import org.junit.Assert; +import org.junit.Test; + +public class ByteSizeTest { + + @Test + public void testValidByteSizes() { + // Test bytes + ByteSize bytes = new ByteSize("1024B"); + Assert.assertEquals(1024L, bytes.getBytes()); + Assert.assertEquals(1.0, bytes.getKilobytes(), 0.001); + + // Test kilobytes + ByteSize kb = new ByteSize("1.5KB"); + Assert.assertEquals(1536L, kb.getBytes()); + Assert.assertEquals(1.5, kb.getKilobytes(), 0.001); + + // Test megabytes + ByteSize mb = new ByteSize("2.5MB"); + Assert.assertEquals(2621440L, mb.getBytes()); + Assert.assertEquals(2560.0, mb.getKilobytes(), 0.001); + Assert.assertEquals(2.5, mb.getMegabytes(), 0.001); + + // Test gigabytes + ByteSize gb = new ByteSize("1.5GB"); + Assert.assertEquals(1610612736L, gb.getBytes()); + Assert.assertEquals(1536.0, gb.getMegabytes(), 0.001); + Assert.assertEquals(1.5, gb.getGigabytes(), 0.001); + + // Test terabytes + ByteSize tb = new ByteSize("2TB"); + Assert.assertEquals(2199023255552L, tb.getBytes()); + Assert.assertEquals(2048.0, tb.getGigabytes(), 0.001); + + // Test petabytes + ByteSize pb = new ByteSize("1PB"); + Assert.assertEquals(1125899906842624L, pb.getBytes()); + } + + @Test + public void testCaseInsensitivity() { + ByteSize kb1 = new ByteSize("1kb"); + ByteSize kb2 = new ByteSize("1KB"); + ByteSize kb3 = new ByteSize("1Kb"); + + Assert.assertEquals(kb1.getBytes(), kb2.getBytes()); + Assert.assertEquals(kb1.getBytes(), kb3.getBytes()); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidFormat() { + new ByteSize("invalid"); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidUnit() { + new ByteSize("1ZB"); + } + + @Test + public void testToString() { + ByteSize size = new ByteSize("1.5MB"); + Assert.assertEquals("1.5MB", size.toString()); + } + + @Test + public void testToJson() { + ByteSize size = new ByteSize("1.5MB"); + Assert.assertEquals("BYTE_SIZE", size.toJson().getAsJsonObject().get("type").getAsString()); + Assert.assertEquals("1.5MB", size.toJson().getAsJsonObject().get("value").getAsString()); + Assert.assertEquals(1572864L, size.toJson().getAsJsonObject().get("bytes").getAsLong()); + } +} \ No newline at end of file diff --git a/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/TimeDurationTest.java b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/TimeDurationTest.java new file mode 100644 index 000000000..eb8eb5ecf --- /dev/null +++ b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/TimeDurationTest.java @@ -0,0 +1,90 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api.parser; + +import org.junit.Assert; +import org.junit.Test; + +public class TimeDurationTest { + + @Test + public void testValidTimeDurations() { + // Test nanoseconds + TimeDuration ns = new TimeDuration("1000ns"); + Assert.assertEquals(1000L, ns.getNanoseconds()); + Assert.assertEquals(1.0, ns.getMicroseconds(), 0.001); + + // Test microseconds + TimeDuration us = new TimeDuration("1.5us"); + Assert.assertEquals(1500L, us.getNanoseconds()); + Assert.assertEquals(1.5, us.getMicroseconds(), 0.001); + + // Test milliseconds + TimeDuration ms = new TimeDuration("2.5ms"); + Assert.assertEquals(2500000L, ms.getNanoseconds()); + Assert.assertEquals(2500.0, ms.getMicroseconds(), 0.001); + Assert.assertEquals(2.5, ms.getMilliseconds(), 0.001); + + // Test seconds + TimeDuration s = new TimeDuration("1.5s"); + Assert.assertEquals(1500000000L, s.getNanoseconds()); + Assert.assertEquals(1500000.0, s.getMicroseconds(), 0.001); + Assert.assertEquals(1500.0, s.getMilliseconds(), 0.001); + Assert.assertEquals(1.5, s.getSeconds(), 0.001); + + // Test minutes + TimeDuration m = new TimeDuration("2m"); + Assert.assertEquals(120000000000L, m.getNanoseconds()); + Assert.assertEquals(120.0, m.getSeconds(), 0.001); + Assert.assertEquals(2.0, m.getMinutes(), 0.001); + + // Test hours + TimeDuration h = new TimeDuration("1.5h"); + Assert.assertEquals(5400000000000L, h.getNanoseconds()); + Assert.assertEquals(90.0, h.getMinutes(), 0.001); + Assert.assertEquals(1.5, h.getHours(), 0.001); + + // Test days + TimeDuration d = new TimeDuration("2d"); + Assert.assertEquals(172800000000000L, d.getNanoseconds()); + Assert.assertEquals(48.0, d.getHours(), 0.001); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidFormat() { + new TimeDuration("invalid"); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidUnit() { + new TimeDuration("1y"); + } + + @Test + public void testToString() { + TimeDuration duration = new TimeDuration("1.5s"); + Assert.assertEquals("1.5s", duration.toString()); + } + + @Test + public void testToJson() { + TimeDuration duration = new TimeDuration("1.5s"); + Assert.assertEquals("TIME_DURATION", duration.toJson().getAsJsonObject().get("type").getAsString()); + Assert.assertEquals("1.5s", duration.toJson().getAsJsonObject().get("value").getAsString()); + Assert.assertEquals(1500000000L, duration.toJson().getAsJsonObject().get("nanos").getAsLong()); + } +} \ No newline at end of file diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 index 7c517ed6a..888a2c9dc 100644 --- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 +++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 @@ -140,7 +140,12 @@ numberRange ; value - : String | Number | Column | Bool + : STRING + | NUMBER + | BOOLEAN + | NULL + | BYTE_SIZE + | TIME_DURATION ; ecommand @@ -311,3 +316,38 @@ fragment Int fragment Digit : [0-9] ; + +fragment BYTE_UNIT + : [Bb] // bytes + | [Kk][Bb] // kilobytes + | [Mm][Bb] // megabytes + | [Gg][Bb] // gigabytes + | [Tt][Bb] // terabytes + | [Pp][Bb] // petabytes + ; + +BYTE_SIZE + : Number BYTE_UNIT + ; + +fragment TIME_UNIT + : 'ns' // nanoseconds + | 'us' // microseconds + | 'ms' // milliseconds + | 's' // seconds + | 'm' // minutes + | 'h' // hours + | 'd' // days + ; + +TIME_DURATION + : Number TIME_UNIT + ; + +byteSizeArg + : BYTE_SIZE + ; + +timeDurationArg + : TIME_DURATION + ; diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/directives/aggregates/AggregateStats.java b/wrangler-core/src/main/java/io/cdap/wrangler/directives/aggregates/AggregateStats.java new file mode 100644 index 000000000..c97dd5a3a --- /dev/null +++ b/wrangler-core/src/main/java/io/cdap/wrangler/directives/aggregates/AggregateStats.java @@ -0,0 +1,154 @@ +package io.cdap.wrangler.directives.aggregates; + +import io.cdap.wrangler.api.Directive; +import io.cdap.wrangler.api.ExecutorContext; +import io.cdap.wrangler.api.Row; +import io.cdap.wrangler.api.annotations.Categories; +import io.cdap.wrangler.api.parser.ByteSize; +import io.cdap.wrangler.api.parser.ColumnName; +import io.cdap.wrangler.api.parser.Text; +import io.cdap.wrangler.api.parser.TimeDuration; +import io.cdap.wrangler.api.parser.TokenType; +import io.cdap.wrangler.api.parser.UsageDefinition; +import io.cdap.wrangler.api.parser.Token; +import io.cdap.wrangler.api.parser.Arguments; + +import java.util.Collections; +import java.util.List; + +/** + * A directive that aggregates byte sizes and time durations from multiple rows. + */ +@Categories(categories = {"aggregate"}) +public class AggregateStats implements Directive { + public static final String NAME = "aggregate-stats"; + private String sizeColumn; + private String timeColumn; + private String totalSizeColumn; + private String totalTimeColumn; + private String outputSizeUnit = "MB"; + private String outputTimeUnit = "s"; + private String aggregationType = "total"; + + @Override + public UsageDefinition define() { + UsageDefinition.Builder builder = UsageDefinition.builder(NAME); + builder.define("size-column", TokenType.COLUMN_NAME); + builder.define("time-column", TokenType.COLUMN_NAME); + builder.define("total-size-column", TokenType.COLUMN_NAME); + builder.define("total-time-column", TokenType.COLUMN_NAME); + builder.define("size-unit", TokenType.TEXT, false); + builder.define("time-unit", TokenType.TEXT, false); + builder.define("aggregation-type", TokenType.TEXT, false); + return builder.build(); + } + + @Override + public void initialize(Arguments args) { + List tokens = args.getTokens(); + if (tokens.size() < 4) { + throw new IllegalArgumentException("aggregate-stats requires at least 4 arguments"); + } + + sizeColumn = ((ColumnName) tokens.get(0)).value(); + timeColumn = ((ColumnName) tokens.get(1)).value(); + totalSizeColumn = ((ColumnName) tokens.get(2)).value(); + totalTimeColumn = ((ColumnName) tokens.get(3)).value(); + + if (tokens.size() > 4) { + outputSizeUnit = ((Text) tokens.get(4)).value(); + } + if (tokens.size() > 5) { + outputTimeUnit = ((Text) tokens.get(5)).value(); + } + if (tokens.size() > 6) { + aggregationType = ((Text) tokens.get(6)).value(); + } + } + + @Override + public List execute(List rows, ExecutorContext context) { + long totalBytes = 0; + long totalNanos = 0; + int rowCount = 0; + + for (Row row : rows) { + Object sizeValue = row.getValue(sizeColumn); + Object timeValue = row.getValue(timeColumn); + + if (sizeValue instanceof ByteSize) { + totalBytes += ((ByteSize) sizeValue).getBytes(); + } + + if (timeValue instanceof TimeDuration) { + totalNanos += ((TimeDuration) timeValue).getNanoseconds(); + } + + rowCount++; + } + + Row result = new Row(); + result.add(totalSizeColumn, convertBytes(totalBytes, outputSizeUnit)); + result.add(totalTimeColumn, convertNanos(totalNanos, outputTimeUnit, aggregationType, rowCount)); + + return Collections.singletonList(result); + } + + private double convertBytes(long bytes, String unit) { + switch (unit.toUpperCase()) { + case "B": + return bytes; + case "KB": + return bytes / 1024.0; + case "MB": + return bytes / (1024.0 * 1024); + case "GB": + return bytes / (1024.0 * 1024 * 1024); + case "TB": + return bytes / (1024.0 * 1024 * 1024 * 1024); + case "PB": + return bytes / (1024.0 * 1024 * 1024 * 1024 * 1024); + default: + throw new IllegalArgumentException("Unsupported byte size unit: " + unit); + } + } + + private double convertNanos(long nanos, String unit, String type, int count) { + double value; + switch (unit.toLowerCase()) { + case "ns": + value = nanos; + break; + case "us": + value = nanos / 1000.0; + break; + case "ms": + value = nanos / (1000.0 * 1000); + break; + case "s": + value = nanos / (1000.0 * 1000 * 1000); + break; + case "m": + value = nanos / (60.0 * 1000 * 1000 * 1000); + break; + case "h": + value = nanos / (60.0 * 60 * 1000 * 1000 * 1000); + break; + case "d": + value = nanos / (24.0 * 60 * 60 * 1000 * 1000 * 1000); + break; + default: + throw new IllegalArgumentException("Unsupported time unit: " + unit); + } + + if ("average".equalsIgnoreCase(type)) { + return value / count; + } + return value; + } + + @Override + public void destroy() { + // No cleanup needed + } +} \ No newline at end of file diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java b/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java index ac35e7a5e..d08d54c26 100644 --- a/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java +++ b/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java @@ -317,6 +317,20 @@ public RecipeSymbol.Builder visitStringList(DirectivesParser.StringListContext c return builder; } + @Override + public RecipeSymbol.Builder visitByteSizeArg(DirectivesParser.ByteSizeArgContext ctx) { + String text = ctx.BYTE_SIZE().getText(); + builder.addToken(new ByteSize(text)); + return builder; + } + + @Override + public RecipeSymbol.Builder visitTimeDurationArg(DirectivesParser.TimeDurationArgContext ctx) { + String text = ctx.TIME_DURATION().getText(); + builder.addToken(new TimeDuration(text)); + return builder; + } + private SourceInfo getOriginalSource(ParserRuleContext ctx) { int a = ctx.getStart().getStartIndex(); int b = ctx.getStop().getStopIndex(); diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/steps/transformation/AggregateStats.java b/wrangler-core/src/main/java/io/cdap/wrangler/steps/transformation/AggregateStats.java new file mode 100644 index 000000000..52dcbecf0 --- /dev/null +++ b/wrangler-core/src/main/java/io/cdap/wrangler/steps/transformation/AggregateStats.java @@ -0,0 +1,282 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.steps.transformation; + +import io.cdap.wrangler.api.Directive; +import io.cdap.wrangler.api.DirectiveContext; +import io.cdap.wrangler.api.DirectiveDefinition; +import io.cdap.wrangler.api.DirectiveExecutionException; +import io.cdap.wrangler.api.DirectiveParseException; +import io.cdap.wrangler.api.EntityCountMetric; +import io.cdap.wrangler.api.ExecutorContext; +import io.cdap.wrangler.api.Row; +import io.cdap.wrangler.api.TokenType; +import io.cdap.wrangler.api.annotations.Category; +import io.cdap.wrangler.api.annotations.PublicEvolving; +import io.cdap.wrangler.api.parser.ByteSize; +import io.cdap.wrangler.api.parser.Token; +import io.cdap.wrangler.api.parser.TokenList; +import io.cdap.wrangler.api.parser.TokenType; +import io.cdap.wrangler.api.parser.TimeDuration; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * A directive for aggregating statistics on byte size and time duration values. + * This directive can calculate sum, average, min, and max for these special types. + */ +@PublicEvolving +@Category(description = "Aggregates statistics on byte size and time duration values") +public class AggregateStats implements Directive { + public static final String NAME = "aggregate-stats"; + private String column; + private String operation; + private String outputColumn; + + @Override + public DirectiveDefinition define() { + return DirectiveDefinition.builder(NAME) + .setDescription("Aggregates statistics on byte size and time duration values") + .setCategory("transformation") + .build(); + } + + @Override + public void initialize(ExecutorContext context) throws DirectiveParseException { + // No initialization needed + } + + @Override + public List execute(List rows, DirectiveContext context) throws DirectiveExecutionException { + if (rows == null || rows.isEmpty()) { + return rows; + } + + // Validate input + if (column == null || column.isEmpty()) { + throw new DirectiveExecutionException(NAME, "Column name is required"); + } + + if (operation == null || operation.isEmpty()) { + throw new DirectiveExecutionException(NAME, "Operation is required"); + } + + if (outputColumn == null || outputColumn.isEmpty()) { + outputColumn = column + "_" + operation; + } + + // Process the rows + List result = new ArrayList<>(); + Object aggregatedValue = null; + + // Determine the type of values we're dealing with + TokenType valueType = null; + for (Row row : rows) { + Object value = row.getValue(column); + if (value != null) { + if (value instanceof ByteSize) { + valueType = TokenType.BYTE_SIZE; + break; + } else if (value instanceof TimeDuration) { + valueType = TokenType.TIME_DURATION; + break; + } + } + } + + if (valueType == null) { + throw new DirectiveExecutionException(NAME, + "Column '" + column + "' does not contain byte size or time duration values"); + } + + // Perform the aggregation based on the operation and value type + switch (operation.toLowerCase()) { + case "sum": + aggregatedValue = calculateSum(rows, column, valueType); + break; + case "avg": + aggregatedValue = calculateAverage(rows, column, valueType); + break; + case "min": + aggregatedValue = calculateMin(rows, column, valueType); + break; + case "max": + aggregatedValue = calculateMax(rows, column, valueType); + break; + default: + throw new DirectiveExecutionException(NAME, + "Unsupported operation: " + operation + ". Supported operations are: sum, avg, min, max"); + } + + // Add the aggregated value to each row + for (Row row : rows) { + Row newRow = new Row(row); + newRow.add(outputColumn, aggregatedValue); + result.add(newRow); + } + + return result; + } + + private Object calculateSum(List rows, String column, TokenType valueType) { + if (valueType == TokenType.BYTE_SIZE) { + long sum = 0; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof ByteSize) { + sum += ((ByteSize) value).toBytes(); + } + } + return new ByteSize(sum, "B", sum + "B"); + } else if (valueType == TokenType.TIME_DURATION) { + long sum = 0; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof TimeDuration) { + sum += ((TimeDuration) value).toMillis(); + } + } + return new TimeDuration(sum, "ms", sum + "ms"); + } + return null; + } + + private Object calculateAverage(List rows, String column, TokenType valueType) { + if (valueType == TokenType.BYTE_SIZE) { + long sum = 0; + int count = 0; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof ByteSize) { + sum += ((ByteSize) value).toBytes(); + count++; + } + } + if (count == 0) { + return new ByteSize(0, "B", "0B"); + } + long avg = sum / count; + return new ByteSize(avg, "B", avg + "B"); + } else if (valueType == TokenType.TIME_DURATION) { + long sum = 0; + int count = 0; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof TimeDuration) { + sum += ((TimeDuration) value).toMillis(); + count++; + } + } + if (count == 0) { + return new TimeDuration(0, "ms", "0ms"); + } + long avg = sum / count; + return new TimeDuration(avg, "ms", avg + "ms"); + } + return null; + } + + private Object calculateMin(List rows, String column, TokenType valueType) { + if (valueType == TokenType.BYTE_SIZE) { + long min = Long.MAX_VALUE; + ByteSize minByteSize = null; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof ByteSize) { + ByteSize byteSize = (ByteSize) value; + long bytes = byteSize.toBytes(); + if (bytes < min) { + min = bytes; + minByteSize = byteSize; + } + } + } + return minByteSize != null ? minByteSize : new ByteSize(0, "B", "0B"); + } else if (valueType == TokenType.TIME_DURATION) { + long min = Long.MAX_VALUE; + TimeDuration minTimeDuration = null; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof TimeDuration) { + TimeDuration timeDuration = (TimeDuration) value; + long millis = timeDuration.toMillis(); + if (millis < min) { + min = millis; + minTimeDuration = timeDuration; + } + } + } + return minTimeDuration != null ? minTimeDuration : new TimeDuration(0, "ms", "0ms"); + } + return null; + } + + private Object calculateMax(List rows, String column, TokenType valueType) { + if (valueType == TokenType.BYTE_SIZE) { + long max = Long.MIN_VALUE; + ByteSize maxByteSize = null; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof ByteSize) { + ByteSize byteSize = (ByteSize) value; + long bytes = byteSize.toBytes(); + if (bytes > max) { + max = bytes; + maxByteSize = byteSize; + } + } + } + return maxByteSize != null ? maxByteSize : new ByteSize(0, "B", "0B"); + } else if (valueType == TokenType.TIME_DURATION) { + long max = Long.MIN_VALUE; + TimeDuration maxTimeDuration = null; + for (Row row : rows) { + Object value = row.getValue(column); + if (value instanceof TimeDuration) { + TimeDuration timeDuration = (TimeDuration) value; + long millis = timeDuration.toMillis(); + if (millis > max) { + max = millis; + maxTimeDuration = timeDuration; + } + } + } + return maxTimeDuration != null ? maxTimeDuration : new TimeDuration(0, "ms", "0ms"); + } + return null; + } + + @Override + public void destroy() { + // No cleanup needed + } + + @Override + public List getCountMetrics() { + return null; + } + + @Override + public void configure(Map arguments) throws DirectiveParseException { + column = arguments.get("column"); + operation = arguments.get("operation"); + outputColumn = arguments.get("output-column"); + } +} \ No newline at end of file diff --git a/wrangler-core/src/test/java/io/cdap/wrangler/parser/DirectivesLexerTest.java b/wrangler-core/src/test/java/io/cdap/wrangler/parser/DirectivesLexerTest.java new file mode 100644 index 000000000..6f8565e78 --- /dev/null +++ b/wrangler-core/src/test/java/io/cdap/wrangler/parser/DirectivesLexerTest.java @@ -0,0 +1,112 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.parser; + +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Token; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +public class DirectivesLexerTest { + + @Test + public void testByteSizeTokens() { + String input = "1B 1KB 1MB 1GB 1TB 1PB"; + DirectivesLexer lexer = new DirectivesLexer(CharStreams.fromString(input)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + tokens.fill(); + List tokenList = tokens.getTokens(); + + // Remove EOF token + tokenList = tokenList.subList(0, tokenList.size() - 1); + + Assert.assertEquals(6, tokenList.size()); + for (Token token : tokenList) { + Assert.assertEquals(DirectivesLexer.BYTE_SIZE, token.getType()); + } + + // Test case insensitivity + Assert.assertEquals("1B", tokenList.get(0).getText()); + Assert.assertEquals("1KB", tokenList.get(1).getText()); + Assert.assertEquals("1MB", tokenList.get(2).getText()); + Assert.assertEquals("1GB", tokenList.get(3).getText()); + Assert.assertEquals("1TB", tokenList.get(4).getText()); + Assert.assertEquals("1PB", tokenList.get(5).getText()); + } + + @Test + public void testTimeDurationTokens() { + String input = "1ns 1us 1ms 1s 1m 1h 1d"; + DirectivesLexer lexer = new DirectivesLexer(CharStreams.fromString(input)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + tokens.fill(); + List tokenList = tokens.getTokens(); + + // Remove EOF token + tokenList = tokenList.subList(0, tokenList.size() - 1); + + Assert.assertEquals(7, tokenList.size()); + for (Token token : tokenList) { + Assert.assertEquals(DirectivesLexer.TIME_DURATION, token.getType()); + } + + Assert.assertEquals("1ns", tokenList.get(0).getText()); + Assert.assertEquals("1us", tokenList.get(1).getText()); + Assert.assertEquals("1ms", tokenList.get(2).getText()); + Assert.assertEquals("1s", tokenList.get(3).getText()); + Assert.assertEquals("1m", tokenList.get(4).getText()); + Assert.assertEquals("1h", tokenList.get(5).getText()); + Assert.assertEquals("1d", tokenList.get(6).getText()); + } + + @Test + public void testInvalidByteSizeTokens() { + String input = "1XB 1YB"; + DirectivesLexer lexer = new DirectivesLexer(CharStreams.fromString(input)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + tokens.fill(); + List tokenList = tokens.getTokens(); + + // Remove EOF token + tokenList = tokenList.subList(0, tokenList.size() - 1); + + // These should be parsed as identifiers, not BYTE_SIZE tokens + for (Token token : tokenList) { + Assert.assertNotEquals(DirectivesLexer.BYTE_SIZE, token.getType()); + } + } + + @Test + public void testInvalidTimeDurationTokens() { + String input = "1xs 1ys"; + DirectivesLexer lexer = new DirectivesLexer(CharStreams.fromString(input)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + tokens.fill(); + List tokenList = tokens.getTokens(); + + // Remove EOF token + tokenList = tokenList.subList(0, tokenList.size() - 1); + + // These should be parsed as identifiers, not TIME_DURATION tokens + for (Token token : tokenList) { + Assert.assertNotEquals(DirectivesLexer.TIME_DURATION, token.getType()); + } + } +} \ No newline at end of file diff --git a/wrangler-core/src/test/java/io/cdap/wrangler/steps/transformation/AggregateStatsTest.java b/wrangler-core/src/test/java/io/cdap/wrangler/steps/transformation/AggregateStatsTest.java new file mode 100644 index 000000000..23f6773a0 --- /dev/null +++ b/wrangler-core/src/test/java/io/cdap/wrangler/steps/transformation/AggregateStatsTest.java @@ -0,0 +1,184 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.steps.transformation; + +import io.cdap.wrangler.api.DirectiveContext; +import io.cdap.wrangler.api.DirectiveExecutionException; +import io.cdap.wrangler.api.DirectiveParseException; +import io.cdap.wrangler.api.ExecutorContext; +import io.cdap.wrangler.api.Row; +import io.cdap.wrangler.api.parser.ByteSize; +import io.cdap.wrangler.api.parser.TimeDuration; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.Mockito.when; + +/** + * Tests for the {@link AggregateStats} directive. + */ +public class AggregateStatsTest { + + @Mock + private ExecutorContext executorContext; + + @Mock + private DirectiveContext directiveContext; + + private AggregateStats directive; + + @Before + public void setUp() throws Exception { + MockitoAnnotations.initMocks(this); + directive = new AggregateStats(); + directive.initialize(executorContext); + } + + @Test + public void testBasicAggregation() throws DirectiveParseException, DirectiveExecutionException { + // Configure the directive + Map arguments = new HashMap<>(); + arguments.put("column", "size"); + arguments.put("operation", "sum"); + directive.configure(arguments); + + // Create test data + List rows = new ArrayList<>(); + Row row1 = new Row(); + row1.add("size", new ByteSize(1024, "B", "1024B")); + rows.add(row1); + + Row row2 = new Row(); + row2.add("size", new ByteSize(1, "KB", "1KB")); + rows.add(row2); + + // Execute the directive + List result = directive.execute(rows, directiveContext); + + // Verify the result + Assert.assertEquals(2, result.size()); + ByteSize expectedSum = new ByteSize(2048, "B", "2048B"); + Assert.assertEquals(expectedSum, result.get(0).getValue("size_sum")); + Assert.assertEquals(expectedSum, result.get(1).getValue("size_sum")); + } + + @Test + public void testDifferentUnits() throws DirectiveParseException, DirectiveExecutionException { + // Configure the directive + Map arguments = new HashMap<>(); + arguments.put("column", "duration"); + arguments.put("operation", "avg"); + directive.configure(arguments); + + // Create test data + List rows = new ArrayList<>(); + Row row1 = new Row(); + row1.add("duration", new TimeDuration(1000, "ms", "1000ms")); + rows.add(row1); + + Row row2 = new Row(); + row2.add("duration", new TimeDuration(1, "s", "1s")); + rows.add(row2); + + // Execute the directive + List result = directive.execute(rows, directiveContext); + + // Verify the result + Assert.assertEquals(2, result.size()); + TimeDuration expectedAvg = new TimeDuration(1500, "ms", "1500ms"); + Assert.assertEquals(expectedAvg, result.get(0).getValue("duration_avg")); + Assert.assertEquals(expectedAvg, result.get(1).getValue("duration_avg")); + } + + @Test + public void testMixedUnits() throws DirectiveParseException, DirectiveExecutionException { + // Configure the directive + Map arguments = new HashMap<>(); + arguments.put("column", "size"); + arguments.put("operation", "max"); + directive.configure(arguments); + + // Create test data + List rows = new ArrayList<>(); + Row row1 = new Row(); + row1.add("size", new ByteSize(1024, "B", "1024B")); + rows.add(row1); + + Row row2 = new Row(); + row2.add("size", new ByteSize(1, "MB", "1MB")); + rows.add(row2); + + Row row3 = new Row(); + row3.add("size", new ByteSize(512, "KB", "512KB")); + rows.add(row3); + + // Execute the directive + List result = directive.execute(rows, directiveContext); + + // Verify the result + Assert.assertEquals(3, result.size()); + ByteSize expectedMax = new ByteSize(1, "MB", "1MB"); + Assert.assertEquals(expectedMax, result.get(0).getValue("size_max")); + Assert.assertEquals(expectedMax, result.get(1).getValue("size_max")); + Assert.assertEquals(expectedMax, result.get(2).getValue("size_max")); + } + + @Test(expected = DirectiveExecutionException.class) + public void testInvalidSizeFormat() throws DirectiveParseException, DirectiveExecutionException { + // Configure the directive + Map arguments = new HashMap<>(); + arguments.put("column", "size"); + arguments.put("operation", "sum"); + directive.configure(arguments); + + // Create test data with invalid format + List rows = new ArrayList<>(); + Row row1 = new Row(); + row1.add("size", "invalid"); + rows.add(row1); + + // Execute the directive - should throw an exception + directive.execute(rows, directiveContext); + } + + @Test(expected = DirectiveExecutionException.class) + public void testInvalidTimeFormat() throws DirectiveParseException, DirectiveExecutionException { + // Configure the directive + Map arguments = new HashMap<>(); + arguments.put("column", "duration"); + arguments.put("operation", "sum"); + directive.configure(arguments); + + // Create test data with invalid format + List rows = new ArrayList<>(); + Row row1 = new Row(); + row1.add("duration", "invalid"); + rows.add(row1); + + // Execute the directive - should throw an exception + directive.execute(rows, directiveContext); + } +} \ No newline at end of file