Elasticsearch is a distributed system designed for storing, searching, and analyzing large volumes of data efficiently.
{ "_id": "1", "name": "Pizza", "price": 12.99, "category": "Entree", "ingredients": [ "dough", "sauce", "cheese" ] }
{ "_id": "2", "name": "Spaghetti", "price": 10.99, "category": "Entree", "ingredients": [ "eggs", "bacon", "parmesan cheese", "pepper" ] }
{ "_id": "1", "name": "leonardo dicaprio", "age": 48, "nationality": "American", "Occupations": [ "actor", "film producer" ], "height": 1.83 }
{ "_id": "2", "name": "John Smith", "age": 54, "nationality": "American", "Occupations": [ "actor", "rapper", "film producer" ], "height": 1.88 }
{ "_id": "1", "name": "Pizza", "price": 12.99, "category": "Emtree", "ingredients": [ "dough", "sauce", "cheese", "toppings" ] }
Elasticsearch provides a powerful query language for searching and analyzing data in real time. Example: A librarian wants to find all books published in 2023 with "Elasticsearch" in the title. Elasticsearch quickly retrieves and ranks the results based on relevance.
from elasticsearch import Elasticsearch # Connect to Elasticsearch es = Elasticsearch("http://localhost:9200") # Movie index name index_name = "movies" # Movie document to index movie_doc = { "title": "Inception", "director": "Christopher Nolan", "year": 2010, "genre": ["Sci-Fi", "Thriller"], "rating": 8.8, "cast": [ {"name": "Leonardo DiCaprio", "role": "Dom Cobb"}, {"name": "Joseph Gordon-Levitt", "role": "Arthur"}, {"name": "Ellen Page", "role": "Ariadne"} ] } # Index the document response = es.index(index=index_name, id=1, document=movie_doc) # Print response print(response)
PUT /movies/_doc/1 { "title": "Inception", "director": "Christopher Nolan", "year": 2010, "genre": ["Sci-Fi", "Thriller"], "rating": 8.8, "cast": [ { "name": "Leonardo DiCaprio", "role": "Dom Cobb" }, { "name": "Joseph Gordon-Levitt", "role": "Arthur" }, { "name": "Ellen Page", "role": "Ariadne" } ] }
from elasticsearch import Elasticsearch # Connect to Elasticsearch es = Elasticsearch("http://localhost:9200") # Movie index name index_name = "movies" # Movie document to index movie_doc = { "title": "Interstellar", "director": "Christopher Nolan", "year": 2014, "genre": ["Sci-Fi", "Adventure"], "rating": 8.6, "cast": [ {"name": "Matthew McConaughey", "role": "Cooper"}, {"name": "Anne Hathaway", "role": "Brand"}, {"name": "Jessica Chastain", "role": "Murph"} ] } # Index the document without providing an ID response = es.index(index=index_name, document=movie_doc) # Print the response print("Document Indexed:", response)
POST /movies/_doc { "title": "Interstellar", "director": "Christopher Nolan", "year": 2014, "genre": ["Sci-Fi", "Adventure"], "rating": 8.6, "cast": [ { "name": "Matthew McConaughey", "role": "Cooper" }, { "name": "Anne Hathaway", "role": "Brand" }, { "name": "Jessica Chastain", "role": "Murph" } ] }
from elasticsearch import Elasticsearch # Connect to Elasticsearch es = Elasticsearch("http://localhost:9200") # Index name index_name = "movies" # Document ID document_id = "Ht5qkoUBceh9VYVhszY2" # Replace with the actual ID of your document # Retrieve the document response = es.get(index=index_name, id=document_id) # Print the retrieved document print("Document Retrieved:", response["_source"])
GET /movies/_doc/Ht5qkoUBceh9VYVhszY2 # Replace with the actual ID of your document
_update
endpoint that allows us to modify specific fields in a document without reindexing the entire document.
from elasticsearch import Elasticsearch # Connect to Elasticsearch es = Elasticsearch("http://localhost:9200") # Index name index_name = "movies" # Document ID document_id = "Ht5qkoUBceh9VYVhszY2" # Update data (fields to modify or add) update_body = { "doc": { "rating": 9.0, # Updating the rating "genre": ["Sci-Fi", "Adventure", "Drama"] # Adding a new genre } } # Update the document response = es.update(index=index_name, id=document_id, body=update_body) # Print the response print("Update Response:", response)
POST /movies/_update/Ht5qkoUBceh9VYVhszY2 { "doc": { "rating": 9.0, // Updating the rating "genre": ["Sci-Fi", "Adventure", "Drama"] // Adding a new genre } }
DELETE
request method and specify the document ID.
from elasticsearch import Elasticsearch # Connect to Elasticsearch es = Elasticsearch("http://localhost:9200") # Index name index_name = "movies" # Document ID document_id = "Ht5qkoUBceh9VYVhszY2" # Delete the document response = es.delete(index=index_name, id=document_id) # Print the response print("Delete Response:", response)
DELETE /movies/_doc/Ht5qkoUBceh9VYVhszY2
from elasticsearch import Elasticsearch, helpers # Connect to Elasticsearch es = Elasticsearch("http://localhost:9200") # Index name index_name = "movies" # Bulk operations actions = [ # Create operation {"create": {"_index": index_name, "_id": "1"}}, {"title": "Inception", "director": "Christopher Nolan", "year": 2010, "genre": "Sci-Fi"}, # Index (insert or replace) operation {"index": {"_index": index_name, "_id": "2"}}, {"title": "The Matrix", "director": "The Wachowskis", "year": 1999, "genre": "Sci-Fi"}, # Update operation {"update": {"_index": index_name, "_id": "1"}}, {"doc": {"genre": "Science Fiction"}}, # Only update specified fields # Delete operation {"delete": {"_index": index_name, "_id": "2"}} ] # Perform the bulk operation response = helpers.bulk(es, actions) # Print the response print("Bulk operation response:", response)
POST /_bulk Content-Type: application/json { "create": { "_index": "movies", "_id": "1" } } { "title": "Inception", "director": "Christopher Nolan", "year": 2010, "genre": "Sci-Fi" } { "index": { "_index": "movies", "_id": "2" } } { "title": "The Matrix", "director": "The Wachowskis", "year": 1999, "genre": "Sci-Fi" } { "update": { "_index": "movies", "_id": "1" } } { "doc": { "genre": "Science Fiction" } } { "delete": { "_index": "movies", "_id": "2" } }
Data in Document | Inferred Data Type |
---|---|
"text" (string) | text and keyword |
123 (integer) | long or integer |
12.34 (float) | float or double |
true or false | boolean |
2025-01-22T10:00:00Z | date |
{ "key": "value" } | object |
[1, 2, 3] (array) | array of numbers |
When you index a document in a new index without pre-defining a mapping, Elasticsearch applies dynamic mapping.
PUT /my_dynamic_index/_doc/1 { "name": "John Doe", "age": 30, "address": { "city": "New York", "zip": 10001 }, "tags": ["developer", "writer"], "created_at": "2025-01-22T10:00:00Z" }
You can view the generated mapping by querying:
GET /my_dynamic_index/_mappingResponse of dynamically created mapping
{ "my_dynamic_index": { "mappings": { "properties": { "name": { "type": "text", "fields": { "keyword": { "type": "keyword" } } }, "age": { "type": "integer" }, "address": { "properties": { "city": { "type": "text", "fields": { "keyword": { "type": "keyword" } } }, "zip": { "type": "long" } } }, "tags": { "type": "text", "fields": { "keyword": { "type": "keyword" } } }, "created_at": { "type": "date" } } } } }
Field Name | Type | Description |
---|---|---|
title | text | Stores the movie title; supports full-text search. |
genre | keyword | Stores the genre; exact match searches only. |
release_date | date | Stores the release date of the movie in a standard date format. |
rating | float | Stores the rating (e.g., IMDb score). |
cast | nested | Stores information about the cast members (e.g., name, role). |
PUT /movies { "mappings": { "properties": { "title": { "type": "text" }, "genre": { "type": "keyword" }, "release_date": { "type": "date" }, "rating": { "type": "float" }, "cast": { "type": "nested", "properties": { "name": { "type": "text" }, "role": { "type": "text" } } } } } }
In Elasticsearch, the main difference between keyword and text field types lies in how they are analyzed and used for searching. Both are used to store string data but are optimized for different use cases:
keyword Field Type:
Elasticsearch allows you to use multi-fields to index a single field as both text and keyword.
This is common when you need both exact matches and full-text search on the same field
Elasticsearch provides a flexible and powerful search engine that supports a wide range of search types. These search types are commonly classified into three categories:
PUT /book/_create/1 { "title": "The Indexing Companion", "author": "Glenda Browne", "publisher": "Information Today, Inc.", "year": 2007, "description": "A classic novel depicting the glamorous and decadent life of the Roaring Twenties." }
PUT /book/_create/2 { "title": "Indexing Books", "author": "Nancy C. Mulvany", "publisher": "University of Chicago Press", "year": 1994, "description": "A masterpiece of classic novel fiction, it explores themes of love, societal norms, and personal growth." }
PUT /book/_create/3 { "title": "The Catcher in the Rye", "author": "J.D. Salinger", "publisher": "Brown and Company", "year": 1951, "description": "Explore a collection of classic tales and novels from various literary periods." }
PUT /book/_create/4 { "title": "The Complete Book of Indexing", "author": "Nancy C. Mulvany", "publisher": "Information Today, Inc.", "year": 1991, "description": "Embark on an extraordinary journey through the pages of this book." }
Designed for searching unstructured or large text fields. They analyze the input and field data before searching.
GET my_index_name/_search { "query": { "match": { "my_field_name": "my_search_query" } } }
GET book/_search { "query": { "match": { "title": "Indexing books" } } }
{ "took": 19, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 3, "relation": "eq" }, "max_score": 1.0998136, "hits": [ { "_index": "book", "_id": "2", "_score": 1.0998136, "_source": { "title": "Indexing Books", "author": "Nancy C. Mulvany", "publisher": "University of Chicago Press", "year": 1994 } }, { "_index": "book", "_id": "4", "_score": 0.9238435, "_source": { "title": "The Complete Book of Indexing", "author": "Nancy C. Mulvany", "publisher": "Information Today, Inc.", "year": 1991 } }, { "_index": "book", "_id": "1", "_score": 0.37365946, "_source": { "title": "The Indexing Companion", "author": "Glenda Browne", "publisher": "Information Today, Inc.", "year": 2007 } } ] } }
GET book/_search { "query": { "match_phrase": { "description": "classic novel" } } }
GET book/_search { "query": { "multi_match": { "query": "novel", "fields": ["title", "description"] } } }
{ "common": { "content": { "query": "common terms in text" } } }
Used for structured data or exact matches. They don’t analyze input or field data.
GET product/_search { "query": { "term": { "publisher": "Information Today, Inc." } } }
{ "term": { "status": "active" } }
{ "terms": { "tags": ["sports", "technology"] } }
{ "range": { "price": { "gte": 10, "lte": 100 } } }
{ "exists": { "field": "email" } }
{ "prefix": { "name": "ela" } }
Combine multiple queries or apply logic to queries (e.g., AND, OR).
must
, should
, must_not
, and filter
.
{ "bool": { "must": { "match": { "content": "search" } }, "filter": { "term": { "status": "published" } } } }
{ "constant_score": { "filter": { "term": { "status": "active" } } } }
{ "dis_max": { "queries": [ { "match": { "title": "quick" } }, { "match": { "description": "quick" } } ] } }
{ "function_score": { "query": { "match": { "content": "Elasticsearch" } }, "functions": [ { "weight": 2, "field_value_factor": { "field": "popularity" } } ] } }
match
, multi_match
, common
).term
, range
, prefix
, exists
).bool
, constant_score
, dis_max
, function_score
).GET /book/_search { "query": { "match": { "title": { "query": "indexig book", "fuzziness": "2" } } } }
GET /products/_search { "query": { "match": { "description": { "query": "smartphone android", "operator": "and" } } } }
GET /products/_search { "query": { "bool": { "must": [ { "range": { "price": { "gte": 500, "lte": 1000 } } }, { "term": { "brand": "Apple" } } ] } } }
GET /products/_search { "query": { "bool": { "must": [ { "range": { "rating": { "gte": 4.5 } } }, { "range": { "price": { "gte": 500, "lte": 1000 } } }, { "term": { "category": "Electronics" } } ] } } }
GET /products/_search { "query": { "match_phrase": { "description": "premium ultrabook" } } }
GET /products/_search { "query": { "match": { "name": "Samsung" } } }
GET /products/_search { "query": { "prefix": { "seller": "Apple" } } }
GET /products/_search { "query": { "bool": { "must": [ { "match": { "name": { "query": "applle air", "fuzziness": "2" } } }, { "term": { "category": "Electronics" } }, { "range": { "price": { "gte": 500, "lte": 1000 } } } ] } } }
Aggregations in Elasticsearch are used to analyze and summarize data. Here's a detailed explanation of the key terms with examples:
A framework for performing data analysis on Elasticsearch documents.
Represents a group of documents based on certain criteria.
bucket: { "USA": 100, "India": 200 }
).Used to calculate numerical values from document fields.
Aggregations that take the output of other aggregations as input.
Nests multiple aggregations together.
Performs aggregations across all documents, regardless of filters.