# 5. Upload Files

<mark style="color:green;">`POST`</mark> `/uploadFiles`

This endpoint enables users to upload files to a specified extraction. If a `batchId` is included in the request, the files will be added to that specific existing batch on the platform. It is important to ensure that the `batchId` already exists; otherwise, the upload will not be successful. If no `batchId` is provided in the request, a new batch will automatically be created for the files.

Files must be uploaded using the `multipart/form-data` content type, which is suitable for uploading binary files (like documents and images).

## Postman Collection

For a complete and interactive set of API requests, please refer to our [Postman Integration](/data-extraction-api/postman-integration.md)collection.

## Server URL

```
https://api.extracta.ai/api/v1
```

## Headers

| Name          | Value                 |
| ------------- | --------------------- |
| Content-Type  | `multipart/form-data` |
| Authorization | `Bearer <token>`      |

## Body

<table><thead><tr><th width="251">Name</th><th width="119">Type</th><th width="115">Required</th><th>Description</th></tr></thead><tbody><tr><td><code>extractionId</code></td><td>string</td><td><code>true</code></td><td>Unique identifier for the extraction.</td></tr><tr><td><code>batchId</code></td><td>string</td><td><code>false</code></td><td>The ID of the batch to add files to.</td></tr><tr><td><code>files</code></td><td>multipart</td><td><code>true</code></td><td><a data-mention href="/pages/GEp82eoMVXcJO7dA0Z4j">/pages/GEp82eoMVXcJO7dA0Z4j</a></td></tr></tbody></table>

For a seamless extraction process, please ensure your documents are in one of our supported formats. Check our Supported File Types page for a list of all formats we currently accept and additional details to prepare your files accordingly.

{% content-ref url="/pages/GEp82eoMVXcJO7dA0Z4j" %}
[Broken mention](broken://pages/GEp82eoMVXcJO7dA0Z4j)
{% endcontent-ref %}

## Code Example

**Note for PHP Users:** Currently, the `/uploadFiles` endpoint supports uploading only one file per request. Please ensure you submit individual requests for each file you need to upload.

{% tabs %}
{% tab title="JavaScript" %}

```javascript
const fs = require('fs');
const axios = require('axios');
const FormData = require('form-data');

/**
 * Uploads files to the Extracta API using Axios for making HTTP requests.
 * 
 * @param {string} token - The authorization token to access the API.
 * @param {string} extractionId - The ID of the extraction process to which these files belong.
 * @param {Array.<string>} files - Paths to the files to be uploaded.
 * @param {string} [batchId=null] - Optional batch ID if the files belong to a specific batch.
 * @returns {Promise<Object>} The promise that resolves to the API response.
 */
async function uploadFiles(token, extractionId, files, batchId = null) {
    const url = "https://api.extracta.ai/api/v1/uploadFiles";
    let formData = new FormData();

    formData.append('extractionId', extractionId);
    if (batchId) {
        formData.append('batchId', batchId);
    }

    // Append files to formData
    files.forEach(file => {
        formData.append('files', fs.createReadStream(file));
    });

    try {
        const response = await axios.post(url, formData, {
            headers: {
                ...formData.getHeaders(),
                'Authorization': `Bearer ${token}`
            },
            // Axios automatically sets the Content-Type to multipart/form-data with the boundary.
        });

        // Handling response
        return response.data; // Directly return the parsed JSON response
    } catch (error) {
        // Handling errors
        throw error.response ? error.response.data : new Error('An unknown error occurred');
    }
}

async function main() {
    const token = 'apiKey';
    const extractionId = 'extractionId';
    const files = ['test_1.png', 'test_2.png'];

    try {
        const response = await uploadFiles(token, extractionId, files);
        console.log(response);
    } catch (error) {
        console.error("Failed to upload files:", error);
    }
}

main();
```

{% endtab %}

{% tab title="Python" %}

```python
import requests

def upload_files(token, extraction_id, files, batch_id=None):
    url = "https://api.extracta.ai/api/v1/uploadFiles"
    headers = {"Authorization": f"Bearer {token}"}

    # Prepare the files for uploading
    file_streams = [
        (
            "files",
            (
                file,
                open(file, "rb"),
                mimetypes.guess_type(file)[0] or "application/octet-stream",
            ),
        )
        for file in files
    ]
    payload = {"extractionId": extraction_id}
    if batch_id is not None:
        payload["batchId"] = batch_id

    try:
        response = requests.post(url, files=file_streams, data=payload, headers=headers)
        response.raise_for_status()  # This will raise an error for HTTP codes 400 or 500
        return response.json()  # Returns the JSON response if no error
    except requests.HTTPError as e:
        # Print server-side error message
        if response.status_code >= 400:
            error_message = response.json()
            print(f"Server returned an error: {error_message}")
        else:
            print(f"HTTP error occurred: {e}")
    except requests.RequestException as e:
        # Handle other requests exceptions
        print(f"Failed to upload files: {e}")
    except Exception as e:
        # Handle other possible exceptions
        print(f"An unexpected error occurred: {e}")
    return None

# Example usage
if __name__ == "__main__":
    token = 'apiKey'
    extraction_id = 'extractionId'
    files = ['test_1.png', 'test_2.png']

    try:
        response = upload_files(token, extraction_id, files)
        print(response)
    except Exception as e:
        print(f"Failed to upload files: {e}")

```

{% endtab %}

{% tab title="PHP" %}

```php
<?php

/**
 * Uploads files to the Extracta API using cURL for making HTTP requests.
 * 
 * @param string $token The authorization token to access the API.
 * @param string $extractionId The ID of the extraction process to which these files belong.
 * @param string $filePath Path to the file to be uploaded.
 * @param string|null $batchId Optional batch ID if the file belongs to a specific batch.
 * @return mixed The response from the API or an error message.
 */
function uploadFiles($token, $extractionId, $filePath, $batchId = null) {
    $url = 'https://api.extracta.ai/api/v1/uploadFiles';

    // Initialize cURL session
    $ch = curl_init($url);

    // Prepare the payload
    $payload = [
        'extractionId' => $extractionId,
        'files' => new CURLFile($filePath, 'image/jpeg', basename($filePath))
    ];

    if ($batchId !== null) {
        $payload['batchId'] = $batchId;
    }

    // Set cURL options
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
    curl_setopt($ch, CURLOPT_HTTPHEADER, [
        'Authorization: Bearer ' . $token,
        'Content-Type: multipart/form-data'
    ]);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

    try {
        // Execute cURL session
        $response = curl_exec($ch);

        // Check for cURL errors
        if (curl_errno($ch)) {
            throw new Exception('Curl error: ' . curl_error($ch));
        }

        // Here, you could additionally parse the $response if it's in JSON format or if needed,
        // For simplicity, just returning the raw response for now
        return $response;
    } catch (Exception $e) {
        // Handle exceptions or errors here
        return 'Error: ' . $e->getMessage();
    } finally {
        // Always close the cURL session
        curl_close($ch);
    }
}

// Example usage
$token = 'apiKey';
$extractionId = 'extractiodId';
$filePath = './test_1.png';
$batchId = null;

try {
    $response = uploadFiles($token, $extractionId, $filePath, $batchId);
    echo $response;
} catch (Exception $e) {
    echo "Failed to upload file: " . $e->getMessage();
}

?>
```

{% endtab %}
{% endtabs %}

## Responses

{% tabs %}
{% tab title="200" %}

```json
{
    "status": "uploaded",
    "extractionId": "extractionId",
    "batchId": "batchId",
    "files": [
        {
            "fileId": "fileId",
            "fileName": "fileName",
            "numberOfPages": 1,
            "url": "url"
        }
    ]
}
```

{% endtab %}

{% tab title="400" %}

```json
{
    "status": "error",
    "message": "Extraction does not exist",
    "extractionId": "extractionId"
}
```

{% endtab %}

{% tab title="500" %}

```json
{
    "status": "error",
    "message": "Error uploading files"
}
```

{% endtab %}
{% endtabs %}


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://docs.extracta.ai/data-extraction-api/api-endpoints-data-extraction/5.-upload-files.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
