# 1. Create extraction

<mark style="color:green;">`POST`</mark> `/createExtraction`

Initiates a new document extraction process. This endpoint allows you to **define** an extraction with specific fields, options, and configurations. Once **created**, you can use the returned `extractionId` to upload files for processing.

## Postman Collection

For a complete and interactive set of API requests, please refer to our [postman-integration](https://docs.extracta.ai/data-extraction-api/postman-integration "mention")collection.

## Server URL

```
https://api.extracta.ai/api/v1
```

## Headers

| Name          | Value              |
| ------------- | ------------------ |
| Content-Type  | `application/json` |
| Authorization | `Bearer <token>`   |

## Body

<table><thead><tr><th width="174">Name</th><th width="101">Type</th><th width="104">Required</th><th>Description</th><th>Dedicated page</th></tr></thead><tbody><tr><td><code>name</code></td><td>string</td><td><code>true</code></td><td>A descriptive name for the extraction.</td><td></td></tr><tr><td><code>description</code></td><td>string</td><td><code>false</code></td><td>A description for the extraction.</td><td></td></tr><tr><td><code>language</code></td><td>string</td><td><code>true</code></td><td>Document's language for accurate extraction.</td><td><a data-mention href="../extraction-details/supported-languages">supported-languages</a></td></tr><tr><td><code>options</code></td><td>object</td><td><code>false</code></td><td>Additional processing options.</td><td><a data-mention href="../extraction-details/options">options</a></td></tr><tr><td><code>fields</code></td><td>object</td><td><code>true</code></td><td>An array of objects, each specifying a field to extract.</td><td><a data-mention href="../extraction-details/fields">fields</a></td></tr></tbody></table>

To fully customize your data extraction request, understanding the `fields` parameter is crucial. This parameter allows you to specify exactly what information you want to extract, with options for `string`, `object`, and `array` types to match your data structure needs.

{% content-ref url="../extraction-details/fields" %}
[fields](https://docs.extracta.ai/data-extraction-api/extraction-details/fields)
{% endcontent-ref %}

Customize your extraction process with additional options such as table analysis and handwritten text recognition.

{% content-ref url="../extraction-details/options" %}
[options](https://docs.extracta.ai/data-extraction-api/extraction-details/options)
{% endcontent-ref %}

## Body Example

```json
{
    "extractionDetails": { 
        "name": "CVs Extraction",
        "description": "...",
        "language": "English",
        "options": {
            "hasTable": false,
            "hasVisuals": false,
            "handwrittenTextRecognition": false,
            "checkboxRecognition": false
        },
        "fields": [
            {
                "description": "",
                "example": "",
                "key": "name"
            },
            {
                "description": "",
                "example": "",
                "key": "surname"
            },
            {
                "description": "",
                "example": "",
                "key": "phone_number"
            },
            {
                "description": "last job title name",
                "example": "Programmer",
                "key": "last_job_position"
            },
            {
                "description": "the number of years in numbers",
                "example": "6",
                "key": "years_of_experience"
            }
        ]
    }
}

```

## Code Example

{% tabs %}
{% tab title="JavaScript" %}

```javascript
const axios = require('axios');

/**
 * Initiates a new document extraction process with the provided details.
 * 
 * @param {string} token - The authorization token for API access.
 * @param {Object} extractionDetails - The details of the extraction to be created.
 * @returns {Promise<Object>} The promise that resolves to the API response with the new extraction ID.
 */
async function createExtraction(token, extractionDetails) {
    const url = "https://api.extracta.ai/api/v1/createExtraction";

    try {
        const response = await axios.post(url, {
            extractionDetails
        }, {
            headers: {
                'Content-Type': 'application/json',
                'Authorization': `Bearer ${token}`
            }
        });

        // Handling response
        return response.data; // Directly return the parsed JSON response
    } catch (error) {
        // Handling errors
        throw error.response ? error.response.data : new Error('An unknown error occurred');
    }
}

async function main() {
    const token = 'apiKey';
    const extractionDetails = {
        "name": "CVs Extraction",
        "description": "...",
        "language": "English",
        "options": {
            "hasTable": false,
            "handwrittenTextRecognition": false
        },
        "fields": [
            { "description": "", "example": "", "key": "name" },
            { "description": "", "example": "", "key": "surname" },
            { "description": "", "example": "", "key": "phone_number" },
            { "description": "last job title name", "example": "Programmer", "key": "last_job_position" },
            { "description": "the number of years in numbers", "example": "6", "key": "years_of_experience" }
        ]
    };

    try {
        const response = await createExtraction(token, extractionDetails);
        console.log("New Extraction Created:", response);
    } catch (error) {
        console.error("Failed to create new extraction:", error);
    }
}

main();
```

{% endtab %}

{% tab title="Python" %}

```python
import requests


def create_extraction(token, extraction_details):
    url = "https://api.extracta.ai/api/v1/createExtraction"
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}

    try:
        response = requests.post(url, json=extraction_details, headers=headers)
        response.raise_for_status()  # Raises an HTTPError if the response status code is 4XX/5XX
        return response.json()  # Returns the parsed JSON response
    except requests.RequestException as e:
        # Handles any requests-related errors
        print(e)
        return None


# Example usage
if __name__ == "__main__":
    token = "apiKey"
    extraction_details = {
        "extractionDetails": {
            "name": "CVs Extraction",
            "description": "...",
            "language": "English",
            "options": {"hasTable": False, "handwrittenTextRecognition": False},
            "fields": [
                {"description": "", "example": "", "key": "name"},
                {"description": "", "example": "", "key": "surname"},
                {"description": "", "example": "", "key": "phone_number"},
                {
                    "description": "last job title name",
                    "example": "Programmer",
                    "key": "last_job_position",
                },
                {
                    "description": "the number of years in numbers",
                    "example": "6",
                    "key": "years_of_experience",
                },
            ],
        }
    }

    response = create_extraction(token, extraction_details)
    print("New Extraction Created:", response)
```

{% endtab %}

{% tab title="PHP" %}

```php
<?php

/**
 * Initiates a new document extraction process with the provided details.
 * 
 * @param string $token The authorization token for API access.
 * @param array $extractionDetails The details of the extraction to be created.
 * @return mixed The API response with the new extraction ID or an error message.
 */
function createExtraction($token, $extractionDetails) {
    $url = 'https://api.extracta.ai/api/v1/createExtraction';

    // Initialize cURL session
    $ch = curl_init($url);

    // Prepare the payload
    $payload = json_encode(['extractionDetails' => $extractionDetails]);

    // Set cURL options
    curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
    curl_setopt($ch, CURLOPT_HTTPHEADER, [
        'Content-Type: application/json',
        'Authorization: Bearer ' . $token,
    ]);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_POST, 1);

    try {
        // Execute cURL session
        $response = curl_exec($ch);

        // Check for cURL errors
        if (curl_errno($ch)) {
            throw new Exception('Curl error: ' . curl_error($ch));
        }

        // For simplicity, returning the decoded response for now
        return $response;
    } catch (Exception $e) {
        // Handle exceptions or errors here
        return 'Error: ' . $e->getMessage();
    } finally {
        // Always close the cURL session
        curl_close($ch);
    }
}

// Example usage
$token = 'apiKey';
$extractionDetails = [
    "name" => "CVs Extraction",
    "description" => "...",
    "language" => "English",
    "options" => [
        "hasTable" => false,
        "handwrittenTextRecognition" => false
    ],
    "fields" => [
        ["description" => "", "example" => "", "key" => "name"],
        ["description" => "", "example" => "", "key" => "surname"],
        ["description" => "", "example" => "", "key" => "phone_number"],
        ["description" => "last job title name", "example" => "Programmer", "key" => "last_job_position"],
        ["description" => "the number of years in numbers", "example" => "6", "key" => "years_of_experience"]
    ]
];

try {
    $response = createExtraction($token, $extractionDetails);
    echo $response;
} catch (Exception $e) {
    echo "Failed to create new extraction: " . $e->getMessage();
}

?>
```

{% endtab %}
{% endtabs %}

## Responses

{% tabs %}
{% tab title="200" %}

```json
{
    "status": "created",
    "createdAt": 1712547789609,
    "extractionId": "extractionId"
}
```

{% endtab %}

{% tab title="400" %}

```json
{
    "status": "error",
    "message": "Language is required"
}
```

{% endtab %}

{% tab title="500" %}

```json
{
    "status": "error",
    "message": "Error creating extraction"
}
```

{% endtab %}
{% endtabs %}
