Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,7 @@ website/node_modules
website/i18n/*
website/.yarn/install-state.gz
docs/packages/*/api

# developer tools
.vscode/*
!.vscode/extensions.json
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ Any manual changes will be lost next time this gets auto-generated. -->

- [file_reader_api](./docs/asset/apis/file_reader_api)
- [file_sender_api](./docs/asset/apis/file_sender_api)
- hdfs_reader_api
- hdfs_sender_api
- [s3_reader_api](./docs/asset/apis/s3_reader_api)
- [s3_sender_api](./docs/asset/apis/s3_sender_api)

Expand All @@ -36,8 +34,6 @@ Any manual changes will be lost next time this gets auto-generated. -->

- [file_exporter](./docs/asset/operations/file_exporter)
- [file_reader](./docs/asset/operations/file_reader)
- hdfs_append
- [hdfs_reader](./docs/asset/operations/hdfs_reader)
- [s3_exporter](./docs/asset/operations/s3_exporter)
- [s3_reader](./docs/asset/operations/s3_reader)

Expand Down
2 changes: 1 addition & 1 deletion asset/asset.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "file",
"version": "3.3.0",
"version": "4.0.0-dev.0",
"description": "A set of processors for working with files",
"minimum_teraslice_version": "2.0.0"
}
7 changes: 4 additions & 3 deletions asset/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "file",
"displayName": "Asset",
"version": "3.3.0",
"version": "4.0.0-dev.0",
"private": true,
"description": "A set of processors for working with files",
"repository": {
Expand All @@ -21,8 +21,9 @@
"test": "yarn --cwd ../ test"
},
"dependencies": {
"@terascope/file-asset-apis": "~1.1.2",
"@terascope/job-components": "~1.12.4",
"@terascope/core-utils": "~2.0.0-dev.12",
"@terascope/file-asset-apis": "~2.0.0-dev.0",
"@terascope/job-components": "~2.0.0-dev.13",
"csvtojson": "~2.0.14",
"fs-extra": "~11.3.2",
"json2csv": "5.0.7",
Expand Down
18 changes: 9 additions & 9 deletions asset/src/__lib/common-schema.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { isNumber, joinList } from '@terascope/job-components';
import { isNumber, joinList } from '@terascope/core-utils';
import { Compression, Format } from '@terascope/file-asset-apis';

const formats = Object.values(Format);
Expand All @@ -25,10 +25,10 @@ const readerSchema = {
default: {},
format: Object
},
connection: {
_connection: {
doc: 'The connection from Terafoundation to use',
default: 'default',
format: 'optional_String'
format: 'optional_string'
},
};

Expand All @@ -39,12 +39,12 @@ export const commonSchema = {
+ 'be treated as a file prefix.\ni.e. "/data/export_" will result in files like'
+ ' "/data/export_hs897f.1079.gz"',
default: null,
format: 'required_String'
format: 'required_string'
},
extension: {
doc: 'A file extension override, by default an extension will be added to the file based on the format and compression settings',
default: null,
format: 'optional_String'
format: 'optional_string'
},
compression: {
doc: 'Compression to use on the object. Supports lz4 and gzip.',
Expand All @@ -54,12 +54,12 @@ export const commonSchema = {
field_delimiter: {
doc: 'Delimiter character between record fields. Only used with `csv` format',
default: null,
format: 'optional_String'
format: 'optional_string'
},
line_delimiter: {
doc: 'Line delimiter character for the object',
default: null,
format: 'optional_String'
format: 'optional_string'
},
fields: {
doc: 'Fields to include in the output',
Expand Down Expand Up @@ -93,9 +93,9 @@ export const commonSchema = {
export const fileReaderSchema = Object.assign({}, commonSchema, readerSchema);

export const opSchema = {
api_name: {
_api_name: {
doc: 'name of api to be used by operation',
default: null,
format: 'optional_String'
format: 'required_string'
}
};
5 changes: 2 additions & 3 deletions asset/src/file_exporter/interfaces.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { OpConfig } from '@terascope/job-components';
import { ChunkedFileSenderConfig } from '@terascope/file-asset-apis';

export interface FileExporterConfig extends ChunkedFileSenderConfig, OpConfig {
api_name: string;
export interface FileExporterConfig extends OpConfig {
_api_name: string;
}
5 changes: 3 additions & 2 deletions asset/src/file_exporter/processor.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { BatchProcessor, DataEntity } from '@terascope/job-components';
import { DataEntity } from '@terascope/core-utils';
import { BatchProcessor } from '@terascope/job-components';
import { FileSender } from '@terascope/file-asset-apis';
import { FileExporterConfig } from './interfaces.js';
import { FileSenderFactoryAPI } from '../file_sender_api/interfaces.js';
Expand All @@ -8,7 +9,7 @@ export default class FileBatcher extends BatchProcessor<FileExporterConfig> {

async initialize(): Promise<void> {
await super.initialize();
const apiName = this.opConfig.api_name;
const apiName = this.opConfig._api_name;
const apiManager = this.getAPI<FileSenderFactoryAPI>(apiName);
// this processor does not allow dynamic routing, use routed-sender operation instead
this.api = await apiManager.create(apiName, { dynamic_routing: false });
Expand Down
28 changes: 1 addition & 27 deletions asset/src/file_exporter/schema.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,8 @@
import { ConvictSchema, ValidatedJobConfig } from '@terascope/job-components';
import { ConvictSchema } from '@terascope/job-components';
import { FileExporterConfig } from './interfaces.js';
import { opSchema } from '../__lib/common-schema.js';
import { DEFAULT_API_NAME } from '../file_sender_api/interfaces.js';

export default class Schema extends ConvictSchema<FileExporterConfig> {
validateJob(job: ValidatedJobConfig): void {
let opIndex = 0;

const opConfig = job.operations.find((op, ind) => {
if (op._op === 'file_exporter') {
opIndex = ind;
return op;
}
return false;
});

if (opConfig == null) throw new Error('Could not find file_exporter operation in jobConfig');

const {
api_name, ...newConfig
} = opConfig;

const apiName = api_name || `${DEFAULT_API_NAME}:${opConfig._op}-${opIndex}`;

// we set the new apiName back on the opConfig so it can reference the unique name
opConfig.api_name = apiName;

this.ensureAPIFromConfig(apiName, job, newConfig);
}

build(): Record<string, any> {
return opSchema;
}
Expand Down
5 changes: 3 additions & 2 deletions asset/src/file_reader/fetcher.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Fetcher, DataEntity } from '@terascope/job-components';
import { DataEntity } from '@terascope/core-utils';
import { Fetcher } from '@terascope/job-components';
import { FileTerasliceAPI, FileSlice } from '@terascope/file-asset-apis';
import { FileReaderConfig } from './interfaces.js';
import { FileReaderFactoryAPI } from '../file_reader_api/interfaces.js';
Expand All @@ -8,7 +9,7 @@ export default class FileFetcher extends Fetcher<FileReaderConfig> {

async initialize(): Promise<void> {
await super.initialize();
const apiName = this.opConfig.api_name;
const apiName = this.opConfig._api_name;
const apiManager = this.getAPI<FileReaderFactoryAPI>(apiName);
this.api = await apiManager.create(apiName, {});
}
Expand Down
5 changes: 2 additions & 3 deletions asset/src/file_reader/interfaces.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { OpConfig } from '@terascope/job-components';
import { ReaderConfig } from '@terascope/file-asset-apis';

export interface FileReaderConfig extends ReaderConfig, OpConfig {
api_name: string;
export interface FileReaderConfig extends OpConfig {
_api_name: string;
}
28 changes: 1 addition & 27 deletions asset/src/file_reader/schema.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,8 @@
import { ConvictSchema, ValidatedJobConfig } from '@terascope/job-components';
import { ConvictSchema } from '@terascope/job-components';
import { FileReaderConfig } from './interfaces.js';
import { opSchema } from '../__lib/common-schema.js';
import { DEFAULT_API_NAME } from '../file_reader_api/interfaces.js';

export default class Schema extends ConvictSchema<FileReaderConfig> {
validateJob(job: ValidatedJobConfig): void {
let opIndex = 0;

const opConfig = job.operations.find((op, ind) => {
if (op._op === 'file_reader') {
opIndex = ind;
return op;
}
return false;
});

if (opConfig == null) throw new Error('Could not find file_reader operation in jobConfig');

const {
api_name, ...newConfig
} = opConfig;

const apiName = api_name || `${DEFAULT_API_NAME}:${opConfig._op}-${opIndex}`;

// we set the new apiName back on the opConfig so it can reference the unique name
opConfig.api_name = apiName;

this.ensureAPIFromConfig(apiName, job, newConfig);
}

build(): Record<string, any> {
return opSchema;
}
Expand Down
5 changes: 3 additions & 2 deletions asset/src/file_reader/slicer.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { Slicer, SlicerRecoveryData } from '@terascope/job-components';
import { FileSlice } from '@terascope/file-asset-apis';
import { FileReaderFactoryAPI, FileReaderAPIConfig } from '../file_reader_api/interfaces.js';
import { FileReaderConfig } from './interfaces.js';

export default class FileSlicerOperation extends Slicer {
export default class FileSlicerOperation extends Slicer<FileReaderConfig> {
slicer!: () => Promise<FileSlice[] | null>;

/**
Expand All @@ -17,7 +18,7 @@ export default class FileSlicerOperation extends Slicer {
async initialize(recoveryData: SlicerRecoveryData[]): Promise<void> {
await super.initialize(recoveryData);

const apiName = this.opConfig.api_name as string;
const apiName = this.opConfig._api_name;
const apiManager = this.getAPI<FileReaderFactoryAPI>(apiName);
const api = await apiManager.create(apiName, {});
const apiConfig = apiManager.getConfig(apiName) as FileReaderAPIConfig;
Expand Down
7 changes: 3 additions & 4 deletions asset/src/file_reader_api/api.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import {
APIFactory, AnyObject, isNil, isString, getTypeOf
} from '@terascope/job-components';
import { isNil, isString, getTypeOf } from '@terascope/core-utils';
import { APIFactory } from '@terascope/job-components';
import { FileTerasliceAPI } from '@terascope/file-asset-apis';
import { FileReaderAPIConfig } from './interfaces.js';

export default class FileReaderAPI extends APIFactory<FileTerasliceAPI, FileReaderAPIConfig> {
validateConfig(input: AnyObject): FileReaderAPIConfig {
validateConfig(input: Record<string, any>): FileReaderAPIConfig {
if (isNil(input.path) || !isString(input.path)) throw new Error(`Invalid parameter path: it must be of type string, was given ${getTypeOf(input.path)}`);
// file_per_slice must be set to true if compression is set to anything besides "none"
if (input.compression !== 'none' && input.file_per_slice !== true) {
Expand Down
4 changes: 2 additions & 2 deletions asset/src/file_reader_api/interfaces.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { APIConfig, APIFactoryRegistry } from '@terascope/job-components';
import { FileTerasliceAPI, ReaderConfig } from '@terascope/file-asset-apis';
import { FileTerasliceAPI, ReaderAPIConfig } from '@terascope/file-asset-apis';

export const DEFAULT_API_NAME = 'file_reader_api';

export interface FileReaderAPIConfig extends ReaderConfig, APIConfig {}
export interface FileReaderAPIConfig extends ReaderAPIConfig, APIConfig {}

export type FileReaderFactoryAPI = APIFactoryRegistry<FileTerasliceAPI, FileReaderAPIConfig>;
3 changes: 2 additions & 1 deletion asset/src/file_reader_api/schema.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { ConvictSchema, cloneDeep } from '@terascope/job-components';
import { cloneDeep } from '@terascope/core-utils';
import { ConvictSchema } from '@terascope/job-components';
import { fileReaderSchema } from '../__lib/common-schema.js';
import { FileReaderAPIConfig } from './interfaces.js';

Expand Down
8 changes: 4 additions & 4 deletions asset/src/file_sender_api/api.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import {
APIFactory, AnyObject, isNil,
isString, getTypeOf, toString,
isNil, isString, getTypeOf, toString,
get
} from '@terascope/job-components';
} from '@terascope/core-utils';
import { APIFactory } from '@terascope/job-components';
import { FileSender } from '@terascope/file-asset-apis';
import { FileSenderAPIConfig } from './interfaces.js';

export default class FileSenderAPI extends APIFactory<FileSender, FileSenderAPIConfig> {
validateConfig(input: AnyObject): FileSenderAPIConfig {
validateConfig(input: Record<string, any>): FileSenderAPIConfig {
if (isNil(input.path) || !isString(input.path)) throw new Error(`Invalid parameter path: it must be of type string, was given ${getTypeOf(input.path)}`);
const workerId = toString(get(this.context, 'cluster.worker.id'));
input.id = workerId;
Expand Down
4 changes: 2 additions & 2 deletions asset/src/file_sender_api/interfaces.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { APIConfig, APIFactoryRegistry } from '@terascope/job-components';
import { FileSender, ChunkedFileSenderConfig } from '@terascope/file-asset-apis';
import { FileSender, ChunkedFileSenderAPIConfig } from '@terascope/file-asset-apis';

export const DEFAULT_API_NAME = 'file_sender_api';

export interface FileSenderAPIConfig extends ChunkedFileSenderConfig, APIConfig {}
export interface FileSenderAPIConfig extends ChunkedFileSenderAPIConfig, APIConfig {}

export type FileSenderFactoryAPI = APIFactoryRegistry<FileSender, FileSenderAPIConfig>;
5 changes: 3 additions & 2 deletions asset/src/file_sender_api/schema.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { ConvictSchema, cloneDeep } from '@terascope/job-components';
import { cloneDeep } from '@terascope/core-utils';
import { ConvictSchema } from '@terascope/job-components';
import { FileSenderAPIConfig } from './interfaces.js';
import { commonSchema } from '../__lib/common-schema.js';

const apiSchema = cloneDeep(commonSchema);
apiSchema.path.format = 'required_String';
apiSchema.path.format = 'required_string';

export default class Schema extends ConvictSchema<FileSenderAPIConfig> {
build(): Record<string, any> {
Expand Down
4 changes: 0 additions & 4 deletions asset/src/hdfs_append/interfaces.ts

This file was deleted.

21 changes: 0 additions & 21 deletions asset/src/hdfs_append/processor.ts

This file was deleted.

35 changes: 0 additions & 35 deletions asset/src/hdfs_append/schema.ts

This file was deleted.

Loading