Skip to content

[Data Liberation] Expose experimental Markdown importer in the importWxr step #2080

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Remove namespaces, lint
  • Loading branch information
adamziel committed Dec 17, 2024
commit 4a316896bf59c70584a89388f9a52efa8af1ce3c
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
<?php

use WordPress\DataLiberation\EntityReaders\WP_Directory_Tree_Entity_Reader;
use WordPress\Filesystem\WP_Filesystem;

class WP_Markdown_Importer extends WP_Stream_Importer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
use League\CommonMark\Extension\Table\TableCell;
use League\CommonMark\Extension\Table\TableRow;
use League\CommonMark\Extension\Table\TableSection;
use WordPress\DataLiberation\Import\WP_Import_Utils;
use WordPress\Data_Liberation\Block_Markup\WP_Block_Markup_Converter;

class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
const STATE_READY = 'STATE_READY';
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
<?php

namespace WordPress\Data_Liberation\Block_Markup;

interface WP_Block_Markup_Converter {
public function convert();
public function get_block_markup();
public function get_all_metadata();
public function get_block_markup();
public function get_all_metadata();
public function get_meta_value( $key );
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* If the post cannot fit into memory, WordPress won't be able to render it
* anyway.
*/
class WP_Block_Markup_Processor extends WP_HTML_Tag_Processor {
class WP_Block_Markup_Processor extends \WP_HTML_Tag_Processor {

private $block_name;
protected $block_attributes;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
<?php

use WordPress\Data_Liberation\Block_Markup\WP_Block_Markup_Converter;
use WordPress\DataLiberation\Import\WP_Import_Utils;

/**
* A basic HTML markup to Block Markup converter.
* It only considers the markup and won't consider any visual
Expand All @@ -21,7 +18,7 @@ class WP_HTML_To_Blocks implements WP_Block_Markup_Converter {
private $metadata = array();

public function __construct( $html ) {
$this->html = new WP_HTML_Processor( $html );
$this->html = new \WP_HTML_Processor( $html );
}

public function convert() {
Expand Down Expand Up @@ -79,7 +76,7 @@ private function handle_tag() {
$this->metadata[ $key ][] = $value;
break;
case 'IMG':
$template = new WP_HTML_Tag_Processor( '<img>' );
$template = new \WP_HTML_Tag_Processor( '<img>' );
$template->next_tag();
foreach ( array( 'alt', 'title', 'src' ) as $attr ) {
if ( $html->get_attribute( $attr ) ) {
Expand Down Expand Up @@ -167,7 +164,7 @@ private function handle_tag() {

// Inline elements
case 'A':
$template = new WP_HTML_Tag_Processor( '<a>' );
$template = new \WP_HTML_Tag_Processor( '<a>' );
$template->next_tag();
if ( $html->get_attribute( 'href' ) ) {
$template->set_attribute( 'href', $html->get_attribute( 'href' ) );
Expand Down Expand Up @@ -320,7 +317,7 @@ private function append_rich_text( $html ) {

private function push_block( $name, $attributes = array() ) {
$this->close_ephemeral_paragraph();
$block = new WP_Block_Object( $name, $attributes );
$block = new \WP_Block_Object( $name, $attributes );
array_push( $this->block_stack, $block );
$this->block_markup .= WP_Import_Utils::block_opener( $block->block_name, $block->attrs ) . "\n";
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<?php

use Rowbot\URL\URL;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<?php

/**
* Finds string fragments that look like URLs and allow replacing them.
* This is the first, "thick" sieve that yields "URL candidates" that must be
Expand Down Expand Up @@ -272,7 +273,7 @@ public function set_raw_url( $new_url ) {
$new_url = substr( $new_url, strpos( $new_url, '://' ) + 3 );
}
$this->raw_url = $new_url;
$this->lexical_updates[ $this->url_starts_at ] = new WP_HTML_Text_Replacement(
$this->lexical_updates[ $this->url_starts_at ] = new \WP_HTML_Text_Replacement(
$this->url_starts_at,
$this->url_length,
$new_url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,9 @@
*
* @TODO: Explore supporting a cursor to allow resuming from where we left off.
*/

namespace WordPress\DataLiberation\EntityReaders;

use WordPress\DataLiberation\Import\WP_Import_Utils;

class WP_Directory_Tree_Entity_Reader implements \Iterator {
private $file_visitor;
private $filesystem;
private $filesystem;
private $entity;

private $pending_directory_index;
Expand All @@ -24,33 +19,33 @@ class WP_Directory_Tree_Entity_Reader implements \Iterator {
private $next_post_id;
private $is_finished = false;
private $entities_read_so_far = 0;
private $allowed_extensions = array();
private $index_file_patterns = array();
private $allowed_extensions = array();
private $index_file_patterns = array();
private $markup_converter_factory;

static public function create(
public static function create(
\WordPress\Filesystem\WP_Abstract_Filesystem $filesystem,
$options
) {
if( ! isset( $options['root_dir'] ) ) {
if ( ! isset( $options['root_dir'] ) ) {
throw new \Exception( 'Missing required options: root_dir' );
}
if( ! isset( $options['first_post_id'] ) ) {
if ( ! isset( $options['first_post_id'] ) ) {
throw new \Exception( 'Missing required options: first_post_id' );
}
if( ! isset( $options['allowed_extensions'] ) ) {
if ( ! isset( $options['allowed_extensions'] ) ) {
throw new \Exception( 'Missing required options: allowed_extensions' );
}
if( ! isset( $options['index_file_patterns'] ) ) {
if ( ! isset( $options['index_file_patterns'] ) ) {
throw new \Exception( 'Missing required options: index_file_patterns' );
}
/**
* @TODO: Use `sub_entity_reader_factory` instead of `markup_converter_factory`
* and expect a WP_Entity_Reader factory, not a WP_Markup_Converter factory.
* This way we'll source all the relevant entity data such as post_meta
* from the files, not just the post_content.
* This way we'll source all the relevant entity data such as post_meta
* from the files, not just the post_content.
*/
if( ! isset( $options['markup_converter_factory'] ) ) {
if ( ! isset( $options['markup_converter_factory'] ) ) {
throw new \Exception( 'Missing required options: markup_converter_factory' );
}
return new self( $filesystem, $options );
Expand All @@ -60,11 +55,11 @@ private function __construct(
\WordPress\Filesystem\WP_Abstract_Filesystem $filesystem,
$options
) {
$this->file_visitor = new \WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem, $options['root_dir'] );
$this->filesystem = $filesystem;
$this->next_post_id = $options['first_post_id'];
$this->allowed_extensions = $options['allowed_extensions'];
$this->index_file_patterns = $options['index_file_patterns'];
$this->file_visitor = new \WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem, $options['root_dir'] );
$this->filesystem = $filesystem;
$this->next_post_id = $options['first_post_id'];
$this->allowed_extensions = $options['allowed_extensions'];
$this->index_file_patterns = $options['index_file_patterns'];
$this->markup_converter_factory = $options['markup_converter_factory'];
}

Expand Down Expand Up @@ -97,7 +92,7 @@ public function next_entity() {
// Move up to the corresponding directory
$missing_parent_path = $dir;
for ( $i = $missing_parent_id_depth; $i < $depth; $i++ ) {
$missing_parent_path = dirname($missing_parent_path);
$missing_parent_path = dirname( $missing_parent_path );
}

$this->parent_ids[ $missing_parent_id_depth ] = $this->emit_post_entity(
Expand All @@ -108,7 +103,7 @@ public function next_entity() {
'title_fallback' => WP_Import_Utils::slug_to_title( basename( $missing_parent_path ) ),
)
);
} else if ( false === $this->pending_directory_index ) {
} elseif ( false === $this->pending_directory_index ) {
// No directory index candidate – let's create a fake page
// just to have something in the page tree.
$this->parent_ids[ $depth ] = $this->emit_post_entity(
Expand All @@ -122,7 +117,7 @@ public function next_entity() {
// We're no longer looking for a directory index.
$this->pending_directory_index = null;
} else {
$file_path = $this->pending_directory_index;
$file_path = $this->pending_directory_index;
$this->parent_ids[ $depth ] = $this->emit_post_entity(
array(
'content' => $this->filesystem->read_file( $file_path ),
Expand All @@ -139,7 +134,7 @@ public function next_entity() {

while ( count( $this->pending_files ) ) {
$parent_id = $this->parent_ids[ $this->file_visitor->get_current_depth() ] ?? null;
$file_path = array_shift( $this->pending_files );
$file_path = array_shift( $this->pending_files );
$this->emit_post_entity(
array(
'content' => $this->filesystem->read_file( $file_path ),
Expand All @@ -164,27 +159,27 @@ public function get_entity(): ?\WP_Imported_Entity {
}

protected function emit_post_entity( $options ) {
$factory = $this->markup_converter_factory;
$factory = $this->markup_converter_factory;
$converter = $factory( $options['content'] );
$converter->convert();
$block_markup = $converter->get_block_markup();

$post_title = null;
if(!$post_title) {
if ( ! $post_title ) {
$removed_title = WP_Import_Utils::remove_first_h1_block_from_block_markup( $block_markup );
if ( false !== $removed_title ) {
$post_title = $removed_title['title'];
$post_title = $removed_title['title'];
$block_markup = $removed_title['remaining_html'];
}
}
if(!$post_title) {
if ( ! $post_title ) {
// In Markdown, the frontmatter title can be a worse title candidate than
// the first H1 block. In block markup exports, it will be the opposite.
//
// @TODO: Enable the API consumer to customize the title resolution.
$post_title = $converter->get_meta_value('post_title');
$post_title = $converter->get_meta_value( 'post_title' );
}
if(!$post_title) {
if ( ! $post_title ) {
$post_title = $options['title_fallback'];
}

Expand All @@ -194,7 +189,7 @@ protected function emit_post_entity( $options ) {
'guid' => $options['source_path'],
'post_title' => $post_title,
'post_content' => $block_markup,
'post_excerpt' => $converter->get_meta_value('post_excerpt') ?? '',
'post_excerpt' => $converter->get_meta_value( 'post_excerpt' ) ?? '',
'post_status' => 'publish',
);

Expand All @@ -214,20 +209,20 @@ protected function emit_post_entity( $options ) {
$entity_data['source_path'] = $source_path;
}

if ( $converter->get_meta_value('slug') ) {
$slug = $converter->get_meta_value('slug');
if ( $converter->get_meta_value( 'slug' ) ) {
$slug = $converter->get_meta_value( 'slug' );
$last_segment = substr( $slug, strrpos( $slug, '/' ) + 1 );
$entity_data['post_name'] = $last_segment;
}

if ( $converter->get_meta_value('post_order') ) {
$entity_data['post_order'] = $converter->get_meta_value('post_order');
if ( $converter->get_meta_value( 'post_order' ) ) {
$entity_data['post_order'] = $converter->get_meta_value( 'post_order' );
}

if ( $options['parent_id'] ) {
$entity_data['post_parent'] = $options['parent_id'];
}

$this->entity = new \WP_Imported_Entity( 'post', $entity_data );
++$this->next_post_id;
++$this->entities_read_so_far;
Expand All @@ -248,12 +243,12 @@ private function next_file() {
}

if ( $event->is_entering() ) {
$abs_paths = [];
foreach($event->files as $filename) {
$abs_paths[] = $event->dir . '/' . $filename;
}
$abs_paths = array();
foreach ( $event->files as $filename ) {
$abs_paths[] = $event->dir . '/' . $filename;
}
$this->pending_files = $this->choose_relevant_files( $abs_paths );
if( ! count($this->pending_files)) {
if ( ! count( $this->pending_files ) ) {
// Only consider directories with relevant files in them.
// Otherwise we'll create fake pages for media directories
// and other directories that don't contain any content.
Expand Down Expand Up @@ -295,9 +290,9 @@ protected function choose_directory_index( $files ) {
}

protected function looks_like_directory_index( $path ) {
$filename = basename($path);
foreach( $this->index_file_patterns as $pattern ) {
if( preg_match( $pattern, $filename ) ) {
$filename = basename( $path );
foreach ( $this->index_file_patterns as $pattern ) {
if ( preg_match( $pattern, $filename ) ) {
return true;
}
}
Expand All @@ -309,7 +304,7 @@ protected function choose_relevant_files( $paths ) {
}

protected function is_valid_file( $path ) {
$extension = pathinfo($path, PATHINFO_EXTENSION);
$extension = pathinfo( $path, PATHINFO_EXTENSION );
return in_array( $extension, $this->allowed_extensions, true );
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public function next_entity() {
* subtleties that will derail the process.
* Let's consider using WP_XML_Processor instead.
*/
$this->current_html_reader = new WP_HTML_Entity_Reader(
$this->current_html_reader = new \WP_HTML_Entity_Reader(
$html,
$this->current_post_id
);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php

abstract class WP_Entity_Reader implements Iterator {
abstract class WP_Entity_Reader implements \Iterator {

abstract public function get_entity();
abstract public function next_entity();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<?php

use WordPress\Data_Liberation\Block_Markup\WP_HTML_To_Blocks;

/**
* Converts a single HTML file into a stream of WordPress entities.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<?php

namespace WordPress\DataLiberation\Import;

/**
* A copy of the WP_Interactivity_API_Directives_Processor class
* from the Gutenberg plugin.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<?php

namespace WordPress\DataLiberation\Import;

class WP_Import_Utils {

public static function block_opener( $block_name, $attrs = array() ) {
Expand Down