shards
This commit is contained in:
@@ -95,7 +95,7 @@ if confirmed:
|
|||||||
confirmed_dataset = confirmed_dataset.map(extract_minimal, remove_columns=confirmed_dataset.column_names)
|
confirmed_dataset = confirmed_dataset.map(extract_minimal, remove_columns=confirmed_dataset.column_names)
|
||||||
|
|
||||||
# Sharding parameters
|
# Sharding parameters
|
||||||
num_shards = min(1, len(confirmed)) # Don't create more shards than samples
|
num_shards = min(25, len(confirmed)) # Don't create more shards than samples
|
||||||
shard_size = len(confirmed_dataset) // num_shards + 1
|
shard_size = len(confirmed_dataset) // num_shards + 1
|
||||||
|
|
||||||
# Write each shard separately
|
# Write each shard separately
|
||||||
|
|||||||
Reference in New Issue
Block a user