Export Weebly blog to WordPress XML
Needed to migrate a Weebly site to WordPress. You can export all the web pages and images, but not the blog content. This script exports into a WordPress XML file that can be imported.
Needed to migrate a Weebly site to WordPress. You can export all the web pages and images, but not the blog content. This script exports into a WordPress XML file that can be imported.
Export the blog
You need to change the blog URL and add any categories you want to tag the posts with.
#! /usr/bin ruby
require "http"
require "nokogiri"
class Export
attr_reader :categories
def initialize url:, categories: {}
@url = url
@categories = categories
end
def call
builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
xml.rss(:version => "2.0",
"xmlns:excerpt" => "http://wordpress.org/export/1.2/excerpt/",
"xmlns:content" => "http://purl.org/rss/1.0/modules/content/",
"xmlns:wfw" => "http://wellformedweb.org/CommentAPI/",
"xmlns:dc" => "http://purl.org/dc/elements/1.1/",
"xmlns:wp" => "http://wordpress.org/export/1.2/") {
xml.channel {
xml["wp"].wxr_version "1.2"
xml.title "Weebly Blog"
xml.link @url.to_s
xml.description ""
xml.language "en"
xml.pubDate Time.now.to_s
xml.lastBuildDate Time.now.to_s
parse_page url: @url, xml: xml
}
}
end
File.write("rss.xml", builder.to_xml)
end
private
def parse_page url:, xml:
puts "parse: #{url}"
body = get(url).to_s
html = Nokogiri::HTML.parse(body)
posts = html.css(".blog-post")
posts.each do |post|
title = post.css(".blog-title .blog-title-link").text.strip
link = post.css(".blog-title .blog-title-link").attribute("href").value
link = absolute_url(link)
date = post.css(".blog-date .date-text").text.strip
time = DateTime.strptime(date, "%m/%d/%Y")
read_more = post.css(".blog-read-more a").attribute("href")
content = if read_more.nil?
post.css(".blog-content").inner_html
else
get_post_page_content(url: read_more.value)
end
content = fix_links(content)
puts "Title: #{title}: #{time}: #{content.length} #{link}"
xml.item {
xml.title title
xml.description
xml.link link
xml.guid(isPermaLink: "false") { xml.text(link) }
categories.each do |id, name|
xml.category(domain: "category", nicename: id) { xml.cdata(name) }
end
xml.pubDate time.to_s
xml["dc"].creator "admin"
xml["wp"].post_date time.to_s
xml["wp"].status "publish"
xml["wp"].post_type "post"
xml["excerpt"].encoded { xml.cdata("") }
xml["content"].encoded { xml.cdata(content) }
}
end
previous = html.css(".blog-page-nav-previous .blog-link").attribute("href")
if !previous.nil?
# puts "Previous: #{absolute_url(previous)}"
parse_page(url: absolute_url(previous.value), xml: xml)
end
end
def fix_links content
html = Nokogiri::HTML.parse(content)
html.css("a").each do |a|
href = a.attribute("href")
if !href.nil?
href.value = absolute_url(href.value)
end
end
html.css("img").each do |img|
src = img.attribute("src")
if !src.nil?
src.value = absolute_url(src.value)
end
end
html.to_s
end
def get url
HTTP.get(url)
end
def get_post_page_content url:
url = absolute_url(url)
puts "Reading from #{url}"
body = get(url).to_s
html = Nokogiri::HTML.parse(body)
html.css(".blog-post .blog-content").inner_html
end
def absolute_url url
URI.join(root_url, url).to_s
end
def root_url
@root_url ||= URI.parse(@url).tap do |uri|
uri.path = ""
uri.query = nil
uri.fragment = nil
uri.scheme = "https"
end
end
end
Export.new(url: "https://www.example.org/outing-summaries-blog", categories: {"category-slug" => "Category name"}).call
Images
For images, I used the website export to pull the image uploads, then ran a search and replace on the posts to have the uploads match the location in WordPress.
I just uploaded them all to the wp-content directoy.
Webmentions
These are webmentions via the IndieWeb and webmention.io. Mention this post from your site: