How to build a sitemap.xml in Node.js using Express

A good sitemap.xml will help you a lot in terms of SEO. It’s a nice starting point if you want to index your site. It’s just a standard XML file that search engines understand.

Can be as simple as this one:

<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
      <loc>https://javaniceday.com/</loc>
      <lastmod>2019-11-18</lastmod>
      <changefreq>monthly</changefreq>
      <priority>0.8</priority>
   </url>
</urlset> 

You can see the complete list of tags definitions here: https://www.sitemaps.org/protocol.html

Let’s code a Node.js web app using Express.js framework to expose an endpoint that prints a well-formed sitemap XML

Install Express js globally to take advantage of Express generator

sudo npm install express -g

Run this command to generate a default Express structure

express --no-view --git node-sitemap-xml

You should see something like this. It could be different depending on your Express version.

   create : node-sitemap-xml/
   create : node-sitemap-xml/public/
   create : node-sitemap-xml/public/javascripts/
   create : node-sitemap-xml/public/images/
   create : node-sitemap-xml/public/stylesheets/
   create : node-sitemap-xml/public/stylesheets/style.css
   create : node-sitemap-xml/routes/
   create : node-sitemap-xml/routes/index.js
   create : node-sitemap-xml/routes/users.js
   create : node-sitemap-xml/public/index.html
   create : node-sitemap-xml/.gitignore
   create : node-sitemap-xml/app.js
   create : node-sitemap-xml/package.json
   create : node-sitemap-xml/bin/
   create : node-sitemap-xml/bin/www

   change directory:
     $ cd node-sitemap-xml

   install dependencies:
     $ npm install

   run the app:
     $ DEBUG=node-sitemap-xml:* npm start

Enter to the created folder

cd node-sitemap-xml

Install dependencies

npm install

Let’s add a dependency to convert javaScript objects to XML

npm install js2xmlparser --save

Add Moment js as a dependency to deal with dates. Moment js it’s an awesome module to parse, format and manipulate dates. Strongly recommended

npm install moment --save

Create a new file called sitemap.js at the routes folder and paste this content.

const express = require("express");
const router = express.Router();

const js2xmlparser = require("js2xmlparser");
const moment = require("moment");

/**
 * It generates an standard sitemal.xml for SEO purposes
 */
router.get("/", function(req, res, next) {
    try {
        //our records to index
        const records = getRecordsFromDataSource();
        const collection = [];
        let today = moment();
        today = today.format("YYYY-MM-DD");
        //add site root url
        const rootUrl = {};
        rootUrl.loc = "https://javaniceday.com/";
        rootUrl.lastmod = today;
        rootUrl.changefreq = "daily";
        rootUrl.priority = "1.0";
        rootUrl["image:image"] = {
            "image:loc": "s://javaniceday.com/default-image.jpg",
            "image:caption":
                "javaniceday.com. Software development blog. Java, Node JS, Salesforce among other technologies",
        };
        collection.push(rootUrl);

        //add recipes urls
        for (let i = 0; i < records.length; i++) {
            const url = {};
            url.loc = records[i].url;
            url.lastmod = records[i].updated_at;
            url["image:image"] = {
                "image:loc": records[i].featured_image_url,
                "image:caption": records[i].description,
            };

            collection.push(url);
        }
        const col = {
            "@": {
                xmlns: "http://www.sitemaps.org/schemas/sitemap/0.9",
                "xmlns:image": "http://www.google.com/schemas/sitemap-image/1.1",
            },
            url: collection,
        };
        const xml = js2xmlparser.parse("urlset", col);
        res.set("Content-Type", "text/xml");
        res.status(200);
        res.send(xml);
    } catch (e) {
        next(e);
    }
});

/**
 * @return a collections to index (typically we'll get these records from our database)
 */
function getRecordsFromDataSource() {
    //these records will have our own structure, we return as they are and later we convert them to the xml standard format
    //so let's just define two records hard-coded

    const record1 = {
        url: "https://javaniceday.com/2019/07/11/better-queue-in-node-js/",
        description:
            "Introduction A good practice in software development is to delegate as much heavy work as possible to background jobs",
        featured_image_url: "https://javaniceday.com/example1.jpg",
        updated_at: "2019-07-11",
    };
    const record2 = {
        url: "https://javaniceday.com/2019/08/11/http-auth-basic-in-node-js-and-express/",
        description: "A small site in Node.js using Express that will have one protected page Http auth basic prompt",
        featured_image_url: "https://javaniceday.com/example1.jpg",
        updated_at: "2019-07-11",
    };
    return [record1, record2];
}

module.exports = router;

Basically that code builds dynamically our sitemap XML. First, it creates a block for the home page and then iterates on our records that can be a mix of different types of records such as recipes, news, cars, etc. You will have to modify as necessary. Have in mind pagination if you have a big collection of records.

As a last step modify the file app.js to add our route:

const sitemapRouter = require("./routes/sitemap");
(...)
app.use("/sitemap.xml", sitemapRouter);

Run locally

npm start

Open your browser at

http://localhost:3000/sitemap.xml

And you will see this output:

<?xml version='1.0'?>
<urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9' xmlns:image='http://www.google.com/schemas/sitemap-image/1.1'>
    <url>
        <loc>https://javaniceday.com/</loc>
        <lastmod>2019-11-18</lastmod>
        <changefreq>daily</changefreq>
        <priority>1.0</priority>
        <image:image>
            <image:loc>s://javaniceday.com/default-image.jpg</image:loc>
            <image:caption>javaniceday.com. Software development blog. Java, Node JS, Salesforce among other technologies</image:caption>
        </image:image>
    </url>
    <url>
        <loc>https://javaniceday.com/2019/07/11/better-queue-in-node-js/</loc>
        <lastmod>2019-07-11</lastmod>
        <image:image>
            <image:loc>https://javaniceday.com/example1.jpg</image:loc>
            <image:caption>Introduction A good practice in software development is to delegate as much heavy work as possible to background jobs</image:caption>
        </image:image>
    </url>
    <url>
        <loc>https://javaniceday.com/2019/08/11/http-auth-basic-in-node-js-and-express/</loc>
        <lastmod>2019-07-11</lastmod>
        <image:image>
            <image:loc>https://javaniceday.com/example1.jpg</image:loc>
            <image:caption>A small site in Node.js using Express that will have one protected page Http auth basic prompt</image:caption>
        </image:image>
    </url>
</urlset>

After deploying the code in production you’ll want to give visibility to your sitemap.xml. I recommend your submit your url to the most used (nowadays) search engine: Google.

Go to https://search.google.com/search-console and submit your url. Google will inspect it periodically

Submit your sitemap.xml to Google Search Console

See the full source code here: https://github.com/andrescanavesi/node-js-sitemap

Photo by Annie Spratt on Unsplash

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s