Collect domain-specific data for fine-tuning language models. Build instruction datasets, Q&A pairs, and specialized training corpora.
Off-the-shelf LLMs lack expertise in your specific domain. Fine-tuning with high-quality, domain-specific data creates models that truly understand your use case.
Building instruction datasets manually is expensive and slow. You need automated collection of examples that match your target format and domain.
Collect the data your model needs
Extract questions and answers from FAQs, forums, and documentation.
Generate instruction-response pairs for RLHF and SFT training.
Build specialized text corpora for continued pre-training.
Extract multi-turn conversations for chat model training.
Collect LLM fine-tuning data
curl -G "https://api.webscraping.ai/ai/fields" \
--data-urlencode "api_key=YOUR_API_KEY" \
--data-urlencode "url=https://support.example.com/faq" \
--data-urlencode "fields[qa_pairs]=Array of objects with question and answer fields for each FAQ item"
# Response:
# {
# "qa_pairs": [
# {
# "question": "How do I reset my password?",
# "answer": "Click the 'Forgot Password' link on the login page..."
# },
# {
# "question": "What payment methods do you accept?",
# "answer": "We accept Visa, Mastercard, PayPal..."
# }
# ]
# }
# pip install webscraping_ai
# https://pypi.org/project/webscraping-ai/
from webscraping_ai import Client
client = Client(api_key="YOUR_API_KEY")
result = client.fields(
"https://support.example.com/faq",
fields={
"qa_pairs": "Array of objects with question and answer fields for each FAQ item",
},
)
print(result)
# Response:
# {
# "qa_pairs": [
# {
# "question": "How do I reset my password?",
# "answer": "Click the 'Forgot Password' link on the login page..."
# },
# {
# "question": "What payment methods do you accept?",
# "answer": "We accept Visa, Mastercard, PayPal..."
# }
# ]
# }
// npm install webscraping-ai
// https://www.npmjs.com/package/webscraping-ai
import { WebScrapingAI } from 'webscraping-ai';
const client = new WebScrapingAI({ apiKey: 'YOUR_API_KEY' });
const result = await client.fields({
url: 'https://support.example.com/faq',
fields: {
qa_pairs: 'Array of objects with question and answer fields for each FAQ item',
},
});
console.log(result);
// Response:
// {
// "qa_pairs": [
// {
// "question": "How do I reset my password?",
// "answer": "Click the 'Forgot Password' link on the login page..."
// },
// {
// "question": "What payment methods do you accept?",
// "answer": "We accept Visa, Mastercard, PayPal..."
// }
// ]
// }
<?php
// composer require webscraping-ai/webscraping-ai-php
// https://packagist.org/packages/webscraping-ai/webscraping-ai-php
require 'vendor/autoload.php';
use WebScrapingAI\Client;
$client = new Client('YOUR_API_KEY');
$result = $client->fields('https://support.example.com/faq', [
'qa_pairs' => 'Array of objects with question and answer fields for each FAQ item',
]);
print_r($result);
// Response:
// {
// "qa_pairs": [
// {
// "question": "How do I reset my password?",
// "answer": "Click the 'Forgot Password' link on the login page..."
// },
// {
// "question": "What payment methods do you accept?",
// "answer": "We accept Visa, Mastercard, PayPal..."
// }
// ]
// }
# gem install webscraping_ai
# https://rubygems.org/gems/webscraping_ai
require 'webscraping_ai'
client = WebScrapingAI::Client.new(api_key: 'YOUR_API_KEY')
result = client.fields(
'https://support.example.com/faq',
fields: {
qa_pairs: 'Array of objects with question and answer fields for each FAQ item',
}
)
puts result.inspect
# Response:
# {
# "qa_pairs": [
# {
# "question": "How do I reset my password?",
# "answer": "Click the 'Forgot Password' link on the login page..."
# },
# {
# "question": "What payment methods do you accept?",
# "answer": "We accept Visa, Mastercard, PayPal..."
# }
# ]
# }
// go get github.com/webscraping-ai/webscraping-ai-go/v4
// https://pkg.go.dev/github.com/webscraping-ai/webscraping-ai-go/v4
package main
import (
"context"
"fmt"
webscrapingai "github.com/webscraping-ai/webscraping-ai-go/v4"
)
func main() {
client, _ := webscrapingai.NewClient(&webscrapingai.Config{APIKey: "YOUR_API_KEY"})
result, _ := client.Fields(context.Background(), &webscrapingai.FieldsOptions{
URL: "https://support.example.com/faq",
Fields: map[string]string{
"qa_pairs": "Array of objects with question and answer fields for each FAQ item",
},
})
fmt.Println(result.Result)
}
// Response:
// {
// "qa_pairs": [
// {
// "question": "How do I reset my password?",
// "answer": "Click the 'Forgot Password' link on the login page..."
// },
// {
// "question": "What payment methods do you accept?",
// "answer": "We accept Visa, Mastercard, PayPal..."
// }
// ]
// }
// Maven: ai.webscraping:webscraping-ai:4.0.0
// https://central.sonatype.com/artifact/ai.webscraping/webscraping-ai
import ai.webscraping.Client;
import ai.webscraping.Config;
import ai.webscraping.option.FieldsOptions;
import ai.webscraping.result.FieldsResult;
Client client = new Client(Config.builder().apiKey("YOUR_API_KEY").build());
FieldsResult result = client.fields(FieldsOptions.builder()
.url("https://support.example.com/faq")
.addField("qa_pairs", "Array of objects with question and answer fields for each FAQ item")
.build());
System.out.println(result.getResult());
// Response:
// {
// "qa_pairs": [
// {
// "question": "How do I reset my password?",
// "answer": "Click the 'Forgot Password' link on the login page..."
// },
// {
// "question": "What payment methods do you accept?",
// "answer": "We accept Visa, Mastercard, PayPal..."
// }
// ]
// }
// dotnet add package WebScrapingAI
// https://www.nuget.org/packages/WebScrapingAI
using WebScrapingAI;
var client = new WebScrapingAIClient(new WebScrapingAIClientOptions { ApiKey = "YOUR_API_KEY" });
var result = await client.FieldsAsync(new FieldsRequest {
Url = "https://support.example.com/faq",
Fields = new Dictionary<string, string> {
["qa_pairs"] = "Array of objects with question and answer fields for each FAQ item",
},
});
Console.WriteLine(result.Result);
// Response:
// {
// "qa_pairs": [
// {
// "question": "How do I reset my password?",
// "answer": "Click the 'Forgot Password' link on the login page..."
// },
// {
// "question": "What payment methods do you accept?",
// "answer": "We accept Visa, Mastercard, PayPal..."
// }
// ]
// }
curl -G "https://api.webscraping.ai/ai/question" \
--data-urlencode "api_key=YOUR_API_KEY" \
--data-urlencode "url=https://docs.example.com/tutorial/getting-started" \
--data-urlencode "question=Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered."
# pip install webscraping_ai
# https://pypi.org/project/webscraping-ai/
from webscraping_ai import Client
client = Client(api_key="YOUR_API_KEY")
answer = client.question(
"https://docs.example.com/tutorial/getting-started",
question="Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered.",
)
print(answer)
// npm install webscraping-ai
// https://www.npmjs.com/package/webscraping-ai
import { WebScrapingAI } from 'webscraping-ai';
const client = new WebScrapingAI({ apiKey: 'YOUR_API_KEY' });
const answer = await client.question({
url: 'https://docs.example.com/tutorial/getting-started',
question: 'Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered.',
});
console.log(answer);
<?php
// composer require webscraping-ai/webscraping-ai-php
// https://packagist.org/packages/webscraping-ai/webscraping-ai-php
require 'vendor/autoload.php';
use WebScrapingAI\Client;
$client = new Client('YOUR_API_KEY');
$answer = $client->question(
'https://docs.example.com/tutorial/getting-started',
'Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered.',
);
echo $answer;
# gem install webscraping_ai
# https://rubygems.org/gems/webscraping_ai
require 'webscraping_ai'
client = WebScrapingAI::Client.new(api_key: 'YOUR_API_KEY')
answer = client.question(
'https://docs.example.com/tutorial/getting-started',
question: 'Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered.'
)
puts answer
// go get github.com/webscraping-ai/webscraping-ai-go/v4
// https://pkg.go.dev/github.com/webscraping-ai/webscraping-ai-go/v4
package main
import (
"context"
"fmt"
webscrapingai "github.com/webscraping-ai/webscraping-ai-go/v4"
)
func main() {
client, _ := webscrapingai.NewClient(&webscrapingai.Config{APIKey: "YOUR_API_KEY"})
answer, _ := client.Question(context.Background(), &webscrapingai.QuestionOptions{
URL: "https://docs.example.com/tutorial/getting-started",
Question: "Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered.",
})
fmt.Println(answer)
}
// Maven: ai.webscraping:webscraping-ai:4.0.0
// https://central.sonatype.com/artifact/ai.webscraping/webscraping-ai
import ai.webscraping.Client;
import ai.webscraping.Config;
import ai.webscraping.option.QuestionOptions;
Client client = new Client(Config.builder().apiKey("YOUR_API_KEY").build());
String answer = client.question(QuestionOptions.builder()
.url("https://docs.example.com/tutorial/getting-started")
.question("Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered.")
.build());
System.out.println(answer);
// dotnet add package WebScrapingAI
// https://www.nuget.org/packages/WebScrapingAI
using WebScrapingAI;
var client = new WebScrapingAIClient(new WebScrapingAIClientOptions { ApiKey = "YOUR_API_KEY" });
var answer = await client.QuestionAsync(new QuestionRequest {
Url = "https://docs.example.com/tutorial/getting-started",
Question = "Convert this tutorial into 5 instruction-response pairs. Format: {"instruction": "user request", "response": "assistant answer"}. Focus on practical tasks covered.",
});
Console.WriteLine(answer);
curl -G "https://api.webscraping.ai/ai/fields" \
--data-urlencode "api_key=YOUR_API_KEY" \
--data-urlencode "url=https://medical-reference.com/glossary" \
--data-urlencode "fields[terms]=Array of {term, definition, usage_example} objects for each medical term"
# pip install webscraping_ai
# https://pypi.org/project/webscraping-ai/
from webscraping_ai import Client
client = Client(api_key="YOUR_API_KEY")
result = client.fields(
"https://medical-reference.com/glossary",
fields={
"terms": "Array of {term, definition, usage_example} objects for each medical term",
},
)
print(result)
// npm install webscraping-ai
// https://www.npmjs.com/package/webscraping-ai
import { WebScrapingAI } from 'webscraping-ai';
const client = new WebScrapingAI({ apiKey: 'YOUR_API_KEY' });
const result = await client.fields({
url: 'https://medical-reference.com/glossary',
fields: {
terms: 'Array of {term, definition, usage_example} objects for each medical term',
},
});
console.log(result);
<?php
// composer require webscraping-ai/webscraping-ai-php
// https://packagist.org/packages/webscraping-ai/webscraping-ai-php
require 'vendor/autoload.php';
use WebScrapingAI\Client;
$client = new Client('YOUR_API_KEY');
$result = $client->fields('https://medical-reference.com/glossary', [
'terms' => 'Array of {term, definition, usage_example} objects for each medical term',
]);
print_r($result);
# gem install webscraping_ai
# https://rubygems.org/gems/webscraping_ai
require 'webscraping_ai'
client = WebScrapingAI::Client.new(api_key: 'YOUR_API_KEY')
result = client.fields(
'https://medical-reference.com/glossary',
fields: {
terms: 'Array of {term, definition, usage_example} objects for each medical term',
}
)
puts result.inspect
// go get github.com/webscraping-ai/webscraping-ai-go/v4
// https://pkg.go.dev/github.com/webscraping-ai/webscraping-ai-go/v4
package main
import (
"context"
"fmt"
webscrapingai "github.com/webscraping-ai/webscraping-ai-go/v4"
)
func main() {
client, _ := webscrapingai.NewClient(&webscrapingai.Config{APIKey: "YOUR_API_KEY"})
result, _ := client.Fields(context.Background(), &webscrapingai.FieldsOptions{
URL: "https://medical-reference.com/glossary",
Fields: map[string]string{
"terms": "Array of {term, definition, usage_example} objects for each medical term",
},
})
fmt.Println(result.Result)
}
// Maven: ai.webscraping:webscraping-ai:4.0.0
// https://central.sonatype.com/artifact/ai.webscraping/webscraping-ai
import ai.webscraping.Client;
import ai.webscraping.Config;
import ai.webscraping.option.FieldsOptions;
import ai.webscraping.result.FieldsResult;
Client client = new Client(Config.builder().apiKey("YOUR_API_KEY").build());
FieldsResult result = client.fields(FieldsOptions.builder()
.url("https://medical-reference.com/glossary")
.addField("terms", "Array of {term, definition, usage_example} objects for each medical term")
.build());
System.out.println(result.getResult());
// dotnet add package WebScrapingAI
// https://www.nuget.org/packages/WebScrapingAI
using WebScrapingAI;
var client = new WebScrapingAIClient(new WebScrapingAIClientOptions { ApiKey = "YOUR_API_KEY" });
var result = await client.FieldsAsync(new FieldsRequest {
Url = "https://medical-reference.com/glossary",
Fields = new Dictionary<string, string> {
["terms"] = "Array of {term, definition, usage_example} objects for each medical term",
},
});
Console.WriteLine(result.Result);
Train models that understand your industry terminology
Fine-tune on documentation and code examples
Train on support conversations and FAQ data
Fine-tune for your brand voice and style
Get started with 1,000 free API credits. No credit card required.