i want to ittrate in raw with each images which is the best to do and access the each images in diffrent folders


images = []

labels = []

for label_folder, _, file_names in os.walk(dataset_path):

if label_folder != dataset_path:

label = label_folder[40:]

for _, _, image_names in os.walk(label_folder):

  relative_image_names = []

  for image_file in image_names:

    relative_image_names.append(dataset_path + "/" + label + "/" + image_file)


  labels.extend([label] * len (relative_image_names))

i tried this way and kind of success but while itreating image it showing an error for not founding one of the folder

for filename in os.listdir(label_folder):

f = os.path.join(label_folder, filename)

# checking if it is a file

if os.path.isfile(f):


data = pd.DataFrame.from_dict({‘label’: labels})

d = pd.DataFrame.from_dict({‘image_path’: images})

d = d.T

data = pd.DataFrame.from_dict({‘image_path’: images, ‘label’: labels})

data = data.T


frames = [data, d]

result = pd.concat(frames)


from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D

we need to define custom features

features = Features({

'image': Array3D(dtype="int64", shape=(3, 224, 224)),

'input_ids': Sequence(feature=Value(dtype='int64')),

'attention_mask': Sequence(Value(dtype='int64')),

'token_type_ids': Sequence(Value(dtype='int64')),

'bbox': Array2D(dtype="int64", shape=(512, 4)),

'labels': ClassLabel(num_classes=len(labels), names=labels),


def preprocess_data(examples):

take a batch of images

images = [Image.open(path).convert(“RGB”) for path in examples[‘image_path’]]

encoded_inputs = processor(images, padding=”max_length”, truncation=True)

add labels

encoded_inputs[“labels”] = [label2id[label] for label in examples[“label”]]

return encoded_inputs

encoded_dataset = dataset.map(preprocess_data, remove_columns=dataset.column_names,

batched=True, batch_size=2)


