Profile picture Schedule a Meeting
c a n d l a n d . n e t

Export Weebly blog to WordPress XML

Dusty Candland | | wordpress, weebly, code, ruby

Needed to migrate a Weebly site to WordPress. You can export all the web pages and images, but not the blog content. This script exports into a WordPress XML file that can be imported.

Export the blog

You need to change the blog URL and add any categories you want to tag the posts with.

#! /usr/bin ruby

require "http"
require "nokogiri"

class Export
  attr_reader :categories

  def initialize url:, categories: {}
    @url = url
    @categories = categories
  end

  def call
    builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
      xml.rss(:version => "2.0",
        "xmlns:excerpt" => "http://wordpress.org/export/1.2/excerpt/",
        "xmlns:content" => "http://purl.org/rss/1.0/modules/content/",
        "xmlns:wfw" => "http://wellformedweb.org/CommentAPI/",
        "xmlns:dc" => "http://purl.org/dc/elements/1.1/",
        "xmlns:wp" => "http://wordpress.org/export/1.2/") {
        xml.channel {
          xml["wp"].wxr_version "1.2"

          xml.title "Weebly Blog"
          xml.link @url.to_s
          xml.description ""
          xml.language "en"
          xml.pubDate Time.now.to_s
          xml.lastBuildDate Time.now.to_s
          parse_page url: @url, xml: xml
        }
      }
    end

    File.write("rss.xml", builder.to_xml)
  end

  private

  def parse_page url:, xml:
    puts "parse: #{url}"
    body = get(url).to_s

    html = Nokogiri::HTML.parse(body)

    posts = html.css(".blog-post")

    posts.each do |post|
      title = post.css(".blog-title .blog-title-link").text.strip
      link = post.css(".blog-title .blog-title-link").attribute("href").value
      link = absolute_url(link)
      date = post.css(".blog-date .date-text").text.strip
      time = DateTime.strptime(date, "%m/%d/%Y")
      read_more = post.css(".blog-read-more a").attribute("href")
      content = if read_more.nil?
        post.css(".blog-content").inner_html
      else
        get_post_page_content(url: read_more.value)
      end

      content = fix_links(content)

      puts "Title: #{title}: #{time}: #{content.length} #{link}"
      xml.item {
        xml.title title
        xml.description
        xml.link link
        xml.guid(isPermaLink: "false") { xml.text(link) }
        categories.each do |id, name|
          xml.category(domain: "category", nicename: id) { xml.cdata(name) }
        end
        xml.pubDate time.to_s
        xml["dc"].creator "admin"
        xml["wp"].post_date time.to_s
        xml["wp"].status "publish"
        xml["wp"].post_type "post"
        xml["excerpt"].encoded { xml.cdata("") }
        xml["content"].encoded { xml.cdata(content) }
      }
    end

    previous = html.css(".blog-page-nav-previous .blog-link").attribute("href")
    if !previous.nil?
      # puts "Previous: #{absolute_url(previous)}"
      parse_page(url: absolute_url(previous.value), xml: xml)
    end
  end

  def fix_links content
    html = Nokogiri::HTML.parse(content)

    html.css("a").each do |a|
      href = a.attribute("href")
      if !href.nil?
        href.value = absolute_url(href.value)
      end
    end

    html.css("img").each do |img|
      src = img.attribute("src")
      if !src.nil?
        src.value = absolute_url(src.value)
      end
    end

    html.to_s
  end

  def get url
    HTTP.get(url)
  end

  def get_post_page_content url:
    url = absolute_url(url)
    puts "Reading from #{url}"
    body = get(url).to_s
    html = Nokogiri::HTML.parse(body)
    html.css(".blog-post .blog-content").inner_html
  end

  def absolute_url url
    URI.join(root_url, url).to_s
  end

  def root_url
    @root_url ||= URI.parse(@url).tap do |uri|
      uri.path = ""
      uri.query = nil
      uri.fragment = nil
      uri.scheme = "https"
    end
  end
end

Export.new(url: "https://www.example.org/outing-summaries-blog", categories: {"category-slug" => "Category name"}).call

Images

For images, I used the website export to pull the image uploads, then ran a search and replace on the posts to have the uploads match the location in WordPress.

I just uploaded them all to the wp-content directoy.

Webmentions

These are webmentions via the IndieWeb and webmention.io. Mention this post from your site: