2017-06-04 22:57:02 +10:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
require 'rails_helper'
|
|
|
|
|
2024-09-04 15:12:25 +10:00
|
|
|
RSpec.describe Extractor do
|
2017-06-04 22:57:02 +10:00
|
|
|
describe 'extract_mentions_or_lists_with_indices' do
|
|
|
|
it 'returns an empty array if the given string does not have at signs' do
|
|
|
|
text = 'a string without at signs'
|
2023-06-06 21:58:33 +10:00
|
|
|
extracted = described_class.extract_mentions_or_lists_with_indices(text)
|
2017-06-04 22:57:02 +10:00
|
|
|
expect(extracted).to eq []
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'does not extract mentions which ends with particular characters' do
|
|
|
|
text = '@screen_name@'
|
2023-06-06 21:58:33 +10:00
|
|
|
extracted = described_class.extract_mentions_or_lists_with_indices(text)
|
2017-06-04 22:57:02 +10:00
|
|
|
expect(extracted).to eq []
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'returns mentions as an array' do
|
|
|
|
text = '@screen_name'
|
2023-06-06 21:58:33 +10:00
|
|
|
extracted = described_class.extract_mentions_or_lists_with_indices(text)
|
2017-06-04 22:57:02 +10:00
|
|
|
expect(extracted).to eq [
|
2023-02-19 08:54:30 +11:00
|
|
|
{ screen_name: 'screen_name', indices: [0, 12] },
|
2017-06-04 22:57:02 +10:00
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'yields mentions if a block is given' do
|
|
|
|
text = '@screen_name'
|
2023-06-06 21:58:33 +10:00
|
|
|
described_class.extract_mentions_or_lists_with_indices(text) do |screen_name, start_position, end_position|
|
2017-06-04 22:57:02 +10:00
|
|
|
expect(screen_name).to eq 'screen_name'
|
|
|
|
expect(start_position).to eq 0
|
|
|
|
expect(end_position).to eq 12
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
describe 'extract_hashtags_with_indices' do
|
|
|
|
it 'returns an empty array if it does not have #' do
|
|
|
|
text = 'a string without hash sign'
|
2023-06-06 21:58:33 +10:00
|
|
|
extracted = described_class.extract_hashtags_with_indices(text)
|
2017-06-04 22:57:02 +10:00
|
|
|
expect(extracted).to eq []
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'does not exclude normal hash text before ://' do
|
|
|
|
text = '#hashtag://'
|
2023-06-06 21:58:33 +10:00
|
|
|
extracted = described_class.extract_hashtags_with_indices(text)
|
2023-02-18 08:56:20 +11:00
|
|
|
expect(extracted).to eq [{ hashtag: 'hashtag', indices: [0, 8] }]
|
2017-06-04 22:57:02 +10:00
|
|
|
end
|
|
|
|
|
|
|
|
it 'excludes http://' do
|
|
|
|
text = '#hashtaghttp://'
|
2023-06-06 21:58:33 +10:00
|
|
|
extracted = described_class.extract_hashtags_with_indices(text)
|
2023-02-18 08:56:20 +11:00
|
|
|
expect(extracted).to eq [{ hashtag: 'hashtag', indices: [0, 8] }]
|
2017-06-04 22:57:02 +10:00
|
|
|
end
|
|
|
|
|
|
|
|
it 'excludes https://' do
|
|
|
|
text = '#hashtaghttps://'
|
2023-06-06 21:58:33 +10:00
|
|
|
extracted = described_class.extract_hashtags_with_indices(text)
|
2023-02-18 08:56:20 +11:00
|
|
|
expect(extracted).to eq [{ hashtag: 'hashtag', indices: [0, 8] }]
|
2017-06-04 22:57:02 +10:00
|
|
|
end
|
|
|
|
|
|
|
|
it 'yields hashtags if a block is given' do
|
|
|
|
text = '#hashtag'
|
2023-06-06 21:58:33 +10:00
|
|
|
described_class.extract_hashtags_with_indices(text) do |hashtag, start_position, end_position|
|
2017-06-04 22:57:02 +10:00
|
|
|
expect(hashtag).to eq 'hashtag'
|
|
|
|
expect(start_position).to eq 0
|
|
|
|
expect(end_position).to eq 8
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-06-18 22:16:16 +10:00
|
|
|
describe 'extract_entities_with_indices' do
|
|
|
|
it 'returns empty array when cashtag present' do
|
2017-06-04 22:57:02 +10:00
|
|
|
text = '$cashtag'
|
2024-06-18 22:16:16 +10:00
|
|
|
extracted = described_class.extract_entities_with_indices(text)
|
2017-06-04 22:57:02 +10:00
|
|
|
expect(extracted).to eq []
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|