Clean up code style of Mastodon::TimestampId module (#5232)

* Clean up code style of Mastodon::TimestampId module * Update brakeman config
2017-10-06 03:42:21 +02:00 · 2017-10-06 03:42:21 +02:00 · eb5ac23434
commit eb5ac23434
parent a5143df303
3 changed files with 124 additions and 119 deletions
--- a/config/brakeman.ignore
+++ b/config/brakeman.ignore
@ -57,6 +57,26 @@
      "confidence": "Weak",
      "note": ""
    },
+    {
+      "warning_type": "SQL Injection",
+      "warning_code": 0,
+      "fingerprint": "34efc76883080f8b1110a30c34ec4f903946ee56651aae46c62477f45d4fc412",
+      "check_name": "SQL",
+      "message": "Possible SQL injection",
+      "file": "lib/mastodon/timestamp_ids.rb",
+      "line": 63,
+      "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
+      "code": "connection.execute(\"        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n        RETURNS bigint AS\\n        $$\\n          DECLARE\\n            time_part bigint;\\n            sequence_base bigint;\\n            tail bigint;\\n          BEGIN\\n            time_part := (\\n              -- Get the time in milliseconds\\n              ((date_part('epoch', now()) * 1000))::bigint\\n              -- And shift it over two bytes\\n              << 16);\\n\\n            sequence_base := (\\n              'x' ||\\n              -- Take the first two bytes (four hex characters)\\n              substr(\\n                -- Of the MD5 hash of the data we documented\\n                md5(table_name ||\\n                  '#{SecureRandom.hex(16)}' ||\\n                  time_part::text\\n                ),\\n                1, 4\\n              )\\n            -- And turn it into a bigint\\n            )::bit(16)::bigint;\\n\\n            -- Finally, add our sequence number to our base, and chop\\n            -- it to the last two bytes\\n            tail := (\\n              (sequence_base + nextval(table_name || '_id_seq'))\\n              & 65535);\\n\\n            -- Return the time part and the sequence part. OR appears\\n            -- faster here than addition, but they're equivalent:\\n            -- time_part has no trailing two bytes, and tail is only\\n            -- the last two bytes.\\n            RETURN time_part | tail;\\n          END\\n        $$ LANGUAGE plpgsql VOLATILE;\\n\")",
+      "render_path": null,
+      "location": {
+        "type": "method",
+        "class": "Mastodon::TimestampIds",
+        "method": "define_timestamp_id"
+      },
+      "user_input": "SecureRandom.hex(16)",
+      "confidence": "Medium",
+      "note": ""
+    },
    {
      "warning_type": "Dynamic Render Path",
      "warning_code": 15,
@ -210,26 +230,6 @@
      "confidence": "Weak",
      "note": ""
    },
-    {
-      "warning_type": "SQL Injection",
-      "warning_code": 0,
-      "fingerprint": "cd440d9d0bcb76225f4142030cec0bdec6ad119c537c108c9d514bf87bc34d29",
-      "check_name": "SQL",
-      "message": "Possible SQL injection",
-      "file": "lib/mastodon/timestamp_ids.rb",
-      "line": 69,
-      "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
-      "code": "ActiveRecord::Base.connection.execute(\"          CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n          RETURNS bigint AS\\n          $$\\n            DECLARE\\n              time_part bigint;\\n              sequence_base bigint;\\n              tail bigint;\\n            BEGIN\\n              -- Our ID will be composed of the following:\\n              -- 6 bytes (48 bits) of millisecond-level timestamp\\n              -- 2 bytes (16 bits) of sequence data\\n\\n              -- The 'sequence data' is intended to be unique within a\\n              -- given millisecond, yet obscure the 'serial number' of\\n              -- this row.\\n\\n              -- To do this, we hash the following data:\\n              -- * Table name (if provided, skipped if not)\\n              -- * Secret salt (should not be guessable)\\n              -- * Timestamp (again, millisecond-level granularity)\\n\\n              -- We then take the first two bytes of that value, and add\\n              -- the lowest two bytes of the table ID sequence number\\n              -- (`table_name`_id_seq). This means that even if we insert\\n              -- two rows at the same millisecond, they will have\\n              -- distinct 'sequence data' portions.\\n\\n              -- If this happens, and an attacker can see both such IDs,\\n              -- they can determine which of the two entries was inserted\\n              -- first, but not the total number of entries in the table\\n              -- (even mod 2**16).\\n\\n              -- The table name is included in the hash to ensure that\\n              -- different tables derive separate sequence bases so rows\\n              -- inserted in the same millisecond in different tables do\\n              -- not reveal the table ID sequence number for one another.\\n\\n              -- The secret salt is included in the hash to ensure that\\n              -- external users cannot derive the sequence base given the\\n              -- timestamp and table name, which would allow them to\\n              -- compute the table ID sequence number.\\n\\n              time_part := (\\n                -- Get the time in milliseconds\\n                ((date_part('epoch', now()) * 1000))::bigint\\n                -- And shift it over two bytes\\n                << 16);\\n\\n              sequence_base := (\\n                'x' ||\\n                -- Take the first two bytes (four hex characters)\\n                substr(\\n                  -- Of the MD5 hash of the data we documented\\n                  md5(table_name ||\\n                    '#{SecureRandom.hex(16)}' ||\\n                    time_part::text\\n                  ),\\n                  1, 4\\n                )\\n              -- And turn it into a bigint\\n              )::bit(16)::bigint;\\n\\n              -- Finally, add our sequence number to our base, and chop\\n              -- it to the last two bytes\\n              tail := (\\n                (sequence_base + nextval(table_name || '_id_seq'))\\n                & 65535);\\n\\n              -- Return the time part and the sequence part. OR appears\\n              -- faster here than addition, but they're equivalent:\\n              -- time_part has no trailing two bytes, and tail is only\\n              -- the last two bytes.\\n              RETURN time_part | tail;\\n            END\\n          $$ LANGUAGE plpgsql VOLATILE;\\n\")",
-      "render_path": null,
-      "location": {
-        "type": "method",
-        "class": "Mastodon::TimestampIds",
-        "method": "s(:self).define_timestamp_id"
-      },
-      "user_input": "SecureRandom.hex(16)",
-      "confidence": "Medium",
-      "note": ""
-    },
    {
      "warning_type": "Cross-Site Scripting",
      "warning_code": 4,
@ -269,6 +269,6 @@
      "note": ""
    }
  ],
-  "updated": "2017-10-05 20:06:40 +0200",
+  "updated": "2017-10-06 03:27:46 +0200",
  "brakeman_version": "4.0.1"
 }
--- a/lib/mastodon/timestamp_ids.rb
+++ b/lib/mastodon/timestamp_ids.rb
@ -1,120 +1,111 @@
 # frozen_string_literal: true

-module Mastodon
-  module TimestampIds
-    def self.define_timestamp_id
-      conn = ActiveRecord::Base.connection
+module Mastodon::TimestampIds
+  DEFAULT_REGEX = /timestamp_id\('(?<seq_prefix>\w+)'/

-      # Make sure we don't already have a `timestamp_id` function.
-      unless conn.execute(<<~SQL).values.first.first
-        SELECT EXISTS(
-          SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
-        );
+  class << self
+    # Our ID will be composed of the following:
+    # 6 bytes (48 bits) of millisecond-level timestamp
+    # 2 bytes (16 bits) of sequence data
+    #
+    # The 'sequence data' is intended to be unique within a
+    # given millisecond, yet obscure the 'serial number' of
+    # this row.
+    #
+    # To do this, we hash the following data:
+    # * Table name (if provided, skipped if not)
+    # * Secret salt (should not be guessable)
+    # * Timestamp (again, millisecond-level granularity)
+    #
+    # We then take the first two bytes of that value, and add
+    # the lowest two bytes of the table ID sequence number
+    # (`table_name`_id_seq). This means that even if we insert
+    # two rows at the same millisecond, they will have
+    # distinct 'sequence data' portions.
+    #
+    # If this happens, and an attacker can see both such IDs,
+    # they can determine which of the two entries was inserted
+    # first, but not the total number of entries in the table
+    # (even mod 2**16).
+    #
+    # The table name is included in the hash to ensure that
+    # different tables derive separate sequence bases so rows
+    # inserted in the same millisecond in different tables do
+    # not reveal the table ID sequence number for one another.
+    #
+    # The secret salt is included in the hash to ensure that
+    # external users cannot derive the sequence base given the
+    # timestamp and table name, which would allow them to
+    # compute the table ID sequence number.
+    def define_timestamp_id
+      return if already_defined?
+
+      connection.execute(<<~SQL)
+        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
+        RETURNS bigint AS
+        $$
+          DECLARE
+            time_part bigint;
+            sequence_base bigint;
+            tail bigint;
+          BEGIN
+            time_part := (
+              -- Get the time in milliseconds
+              ((date_part('epoch', now()) * 1000))::bigint
+              -- And shift it over two bytes
+              << 16);
+
+            sequence_base := (
+              'x' ||
+              -- Take the first two bytes (four hex characters)
+              substr(
+                -- Of the MD5 hash of the data we documented
+                md5(table_name ||
+                  '#{SecureRandom.hex(16)}' ||
+                  time_part::text
+                ),
+                1, 4
+              )
+            -- And turn it into a bigint
+            )::bit(16)::bigint;
+
+            -- Finally, add our sequence number to our base, and chop
+            -- it to the last two bytes
+            tail := (
+              (sequence_base + nextval(table_name || '_id_seq'))
+              & 65535);
+
+            -- Return the time part and the sequence part. OR appears
+            -- faster here than addition, but they're equivalent:
+            -- time_part has no trailing two bytes, and tail is only
+            -- the last two bytes.
+            RETURN time_part | tail;
+          END
+        $$ LANGUAGE plpgsql VOLATILE;
      SQL
-        # The function doesn't exist, so we'll define it.
-        conn.execute(<<~SQL)
-          CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
-          RETURNS bigint AS
-          $$
-            DECLARE
-              time_part bigint;
-              sequence_base bigint;
-              tail bigint;
-            BEGIN
-              -- Our ID will be composed of the following:
-              -- 6 bytes (48 bits) of millisecond-level timestamp
-              -- 2 bytes (16 bits) of sequence data
-
-              -- The 'sequence data' is intended to be unique within a
-              -- given millisecond, yet obscure the 'serial number' of
-              -- this row.
-
-              -- To do this, we hash the following data:
-              -- * Table name (if provided, skipped if not)
-              -- * Secret salt (should not be guessable)
-              -- * Timestamp (again, millisecond-level granularity)
-
-              -- We then take the first two bytes of that value, and add
-              -- the lowest two bytes of the table ID sequence number
-              -- (`table_name`_id_seq). This means that even if we insert
-              -- two rows at the same millisecond, they will have
-              -- distinct 'sequence data' portions.
-
-              -- If this happens, and an attacker can see both such IDs,
-              -- they can determine which of the two entries was inserted
-              -- first, but not the total number of entries in the table
-              -- (even mod 2**16).
-
-              -- The table name is included in the hash to ensure that
-              -- different tables derive separate sequence bases so rows
-              -- inserted in the same millisecond in different tables do
-              -- not reveal the table ID sequence number for one another.
-
-              -- The secret salt is included in the hash to ensure that
-              -- external users cannot derive the sequence base given the
-              -- timestamp and table name, which would allow them to
-              -- compute the table ID sequence number.
-
-              time_part := (
-                -- Get the time in milliseconds
-                ((date_part('epoch', now()) * 1000))::bigint
-                -- And shift it over two bytes
-                << 16);
-
-              sequence_base := (
-                'x' ||
-                -- Take the first two bytes (four hex characters)
-                substr(
-                  -- Of the MD5 hash of the data we documented
-                  md5(table_name ||
-                    '#{SecureRandom.hex(16)}' ||
-                    time_part::text
-                  ),
-                  1, 4
-                )
-              -- And turn it into a bigint
-              )::bit(16)::bigint;
-
-              -- Finally, add our sequence number to our base, and chop
-              -- it to the last two bytes
-              tail := (
-                (sequence_base + nextval(table_name || '_id_seq'))
-                & 65535);
-
-              -- Return the time part and the sequence part. OR appears
-              -- faster here than addition, but they're equivalent:
-              -- time_part has no trailing two bytes, and tail is only
-              -- the last two bytes.
-              RETURN time_part | tail;
-            END
-          $$ LANGUAGE plpgsql VOLATILE;
-        SQL
-      end
    end

-    def self.ensure_id_sequences_exist
-      conn = ActiveRecord::Base.connection
-
+    def ensure_id_sequences_exist
      # Find tables using timestamp IDs.
-      default_regex = /timestamp_id\('(?<seq_prefix>\w+)'/
-      conn.tables.each do |table|
+      connection.tables.each do |table|
        # We're only concerned with "id" columns.
-        next unless (id_col = conn.columns(table).find { |col| col.name == 'id' })
+        next unless (id_col = connection.columns(table).find { |col| col.name == 'id' })

        # And only those that are using timestamp_id.
-        next unless (data = default_regex.match(id_col.default_function))
+        next unless (data = DEFAULT_REGEX.match(id_col.default_function))

        seq_name = data[:seq_prefix] + '_id_seq'
+
        # If we were on Postgres 9.5+, we could do CREATE SEQUENCE IF
        # NOT EXISTS, but we can't depend on that. Instead, catch the
        # possible exception and ignore it.
        # Note that seq_name isn't a column name, but it's a
        # relation, like a column, and follows the same quoting rules
        # in Postgres.
-        conn.execute(<<~SQL)
+        connection.execute(<<~SQL)
          DO $$
            BEGIN
-              CREATE SEQUENCE #{conn.quote_column_name(seq_name)};
+              CREATE SEQUENCE #{connection.quote_column_name(seq_name)};
            EXCEPTION WHEN duplicate_table THEN
              -- Do nothing, we have the sequence already.
            END
@ -122,5 +113,19 @@ module Mastodon
        SQL
      end
    end
+
+    private
+
+    def already_defined?
+      connection.execute(<<~SQL).values.first.first
+        SELECT EXISTS(
+          SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
+        );
+      SQL
+    end
+
+    def connection
+      ActiveRecord::Base.connection
+    end
  end
 end
--- a/lib/tasks/db.rake
+++ b/lib/tasks/db.rake
@ -20,10 +20,10 @@ def each_schema_load_environment

  if Rails.env == 'development'
    test_conf = ActiveRecord::Base.configurations['test']
+
    if test_conf['database']&.present?
      ActiveRecord::Base.establish_connection(:test)
      yield
-
      ActiveRecord::Base.establish_connection(Rails.env.to_sym)
    end
  end