Ticket 766: Merge attribute_values and nodes tables into one master upstream jenkins-baltrad-db-427
authorAnders Henja <anders@henjab.se>
Fri, 25 Jan 2019 08:19:26 +0000 (09:19 +0100)
committerAnders Henja <anders@henjab.se>
Fri, 25 Jan 2019 08:19:26 +0000 (09:19 +0100)
server/itest/sqla/query_test.py
server/src/baltrad/bdbserver/sqla/backend.py
server/src/baltrad/bdbserver/sqla/migrate/versions/010_merge_nodes_and_attribute_values.py [new file with mode: 0644]
server/src/baltrad/bdbserver/sqla/query.py
server/src/baltrad/bdbserver/sqla/schema.py

index 4c2f568..24b48f4 100644 (file)
@@ -700,7 +700,6 @@ class TestAttributeQuery(object):
         self.query.fetch = {
             "count_ysize" : expr.count(expr.attribute("where/ysize", "long"))
         }
-
         result = self.query.execute(self.backend)
         eq_(1, len(result))
         eq_({"count_ysize": 6}, result[0])
index 340968f..179c4db 100644 (file)
@@ -25,6 +25,7 @@ import stat
 import time
 import uuid
 import sys
+from sys import path
 if sys.version_info > (3,):
     long = int
     basestring = str
@@ -531,23 +532,34 @@ def insert_file(conn, meta, source_id):
         size=meta.bdb_file_size,
     ).inserted_primary_key[0]
 
-def insert_metadata(conn, meta, file_id):
-    node_ids = {}
+def merge_dictionaries(x, y):
+    result = x.copy()
+    result.update(y)
+    return result
 
+def insert_metadata(conn, meta, file_id):
+    bulk = []
     for node in meta.iternodes():
         if node.path().startswith("/_bdb"):
             continue
-        node_id = conn.execute(
-            schema.nodes.insert(),
-            file_id=file_id,
-            path=node.parent and node.parent.path() or "/",
-            parent_id=node_ids.get(node.parent, None),
-            name=node.name,
-            type=node.type_id,
-        ).inserted_primary_key[0]
-        node_ids[node] = node_id
+
+        values = {"value_long": None, "value_string": None, "value_double": None, "value_boolean": None, "value_date": None, "value_time": None}
         if isinstance(node, oh5.Attribute):
-            insert_attribute_value(conn, node, node_id)
+            values = merge_dictionaries(values, _get_attribute_sql_values(node))
+
+        bulk.append(
+            dict(
+                 file_id=file_id,
+                 path=node.parent and node.parent.path() or "/",
+                 name=node.name,
+                 type=node.type_id,
+                 **values
+            )
+        )
+    conn.execute(
+        schema.nodes.insert(),
+        bulk
+    )
 
 def _parse_date(datestr):
     if len(datestr) != 8 or not datestr.isdigit():
@@ -605,20 +617,21 @@ def _get_attribute_sql_values(node):
         )
     return values
 
-def insert_attribute_value(conn, node, node_id):
-    values = _get_attribute_sql_values(node)
-
-    conn.execute(
-        schema.attribute_values.insert(),
-        node_id=node_id,
-        **values
-    )
+def create_nodename(row):
+    name = row[schema.nodes.c.name]
+    path = row[schema.nodes.c.path]
+    result = path
+    if not name is None:
+        if path[-1] != "/":
+            result = "%s/%s"%(result, name)
+        else:
+            result = "%s%s"%(result, name)
+    return result
 
 def _select_metadata(conn, file_id):
     qry = sql.select(
-        [schema.nodes, schema.attribute_values],
+        [schema.nodes],
         schema.nodes.c.file_id==file_id,
-        from_obj=schema.nodes.outerjoin(schema.attribute_values),
         order_by=[
             schema.nodes.c.id.asc()
         ]
@@ -630,15 +643,15 @@ def _select_metadata(conn, file_id):
 
     result = conn.execute(qry)
     row = result.fetchone()
-    nodes[row[schema.nodes.c.id]] = meta.root()
+    nodes[create_nodename(row)] = meta.root()
 
     for row in result:
-        node_id = row[schema.nodes.c.id]
-        parent_id = row[schema.nodes.c.parent_id]
-        parent = nodes[parent_id]
+        parent_path = row[schema.nodes.c.path]
         node = _create_node_from_row(row)
-        parent.add_child(node)
-        nodes[node_id] = node
+        if parent_path in nodes.keys():
+            parent = nodes[parent_path]
+            parent.add_child(node)
+        nodes[create_nodename(row)] = node
     
     return meta
  
@@ -647,11 +660,11 @@ def _create_node_from_row(row):
     name = row[schema.nodes.c.name]
     if type_ == oh5.Attribute.type_id:
         node = oh5.Attribute(name, None)
-        node.value = row[schema.attribute_values.c.value_string]
+        node.value = row[schema.nodes.c.value_string]
         if node.value is None:
-            node.value = row[schema.attribute_values.c.value_long]
+            node.value = row[schema.nodes.c.value_long]
         if node.value is None:
-            node.value = row[schema.attribute_values.c.value_double]
+            node.value = row[schema.nodes.c.value_double]
         return node
     elif type_ == oh5.Group.type_id:
         return oh5.Group(name)
diff --git a/server/src/baltrad/bdbserver/sqla/migrate/versions/010_merge_nodes_and_attribute_values.py b/server/src/baltrad/bdbserver/sqla/migrate/versions/010_merge_nodes_and_attribute_values.py
new file mode 100644 (file)
index 0000000..5983fa1
--- /dev/null
@@ -0,0 +1,141 @@
+'''
+Copyright (C) 2019- Swedish Meteorological and Hydrological Institute (SMHI)
+
+This file is part of baltrad-db.
+
+baltrad-db is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+baltrad-db is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with baltrad-db.  If not, see <http://www.gnu.org/licenses/>.
+
+'''
+## Merge bdb_nodes and bdb_attribute_values so that only one table is used. The performance penalty for using two
+## tables seems to be in the range of double since each insert has to be done individually so that the
+## attribute_value gets the correct node id. Could probably be solved by doing some fancy db-magic but that's
+## better handled in the future if array searching is needed. Now, every value is just stored / node-id anyway.
+## @file
+## @author Anders Henja, SMHI
+## @date 2019-01-22
+
+import contextlib
+import os
+
+import progressbar
+
+from sqlalchemy import (
+  Column,
+  ForeignKey,
+  MetaData,
+  PrimaryKeyConstraint,
+  Table,
+  sql,
+  UniqueConstraint,  
+)
+
+from sqlalchemy.types import (
+    BigInteger,
+    Boolean,
+    Date,
+    Float,
+    Integer,
+    LargeBinary,
+    Text,
+    Time,
+)
+
+from sqlalchemy import (
+  Column,
+  MetaData,
+  Table,
+)
+
+from sqlalchemy.types import (
+  Integer,
+  Text,
+)
+
+def upgrade(engine):
+    meta = MetaData(bind=engine)
+    nodes = Table("bdb_nodes", meta,
+      Column("id", Integer, primary_key=True),
+      Column("parent_id", Integer, ForeignKey("bdb_nodes.id")),
+      Column("name", Text, nullable=False),
+      Column("type", Integer, nullable=False),
+      Column("value_long", BigInteger),
+      Column("value_string", Text),
+      Column("value_double", Float),
+      Column("value_boolean", Boolean),
+      Column("value_date", Date),
+      Column("value_time", Time),
+    )
+
+    attribute_values = Table("bdb_attribute_values", meta,
+      Column("node_id", Integer,
+        ForeignKey(nodes.c.id, ondelete="CASCADE"),
+        primary_key=True),
+      Column("value_long", BigInteger, key="value_long"),
+      Column("value_string", Text, key="value_string"),
+      Column("value_double", Float),
+      Column("value_boolean", Boolean, key="value_boolean"),
+      Column("value_date", Date),
+      Column("value_time", Time),
+    )
+
+    nodes.c.value_long.create()
+    nodes.c.value_string.create()
+    nodes.c.value_double.create()
+    nodes.c.value_boolean.create()
+    nodes.c.value_date.create()
+    nodes.c.value_time.create()
+   
+    with contextlib.closing(engine.connect()) as conn:
+        conn.execute("update bdb_nodes bn set value_long = bav.value_long, value_string=bav.value_string, value_double=bav.value_double, value_boolean=bav.value_boolean, value_date=bav.value_date, value_time=bav.value_time from bdb_attribute_values bav where bn.id = bav.node_id");
+    
+    attribute_values.drop();
+
+def downgrade(engine):
+    meta = MetaData(bind=engine)
+    nodes = Table("bdb_nodes", meta,
+      Column("id", Integer, primary_key=True),
+      Column("parent_id", Integer, ForeignKey("bdb_nodes.id")),
+      Column("name", Text, nullable=False),
+      Column("type", Integer, nullable=False),
+      Column("value_long", BigInteger),
+      Column("value_string", Text),
+      Column("value_double", Float),
+      Column("value_boolean", Boolean),
+      Column("value_date", Date),
+      Column("value_time", Time),
+    )
+    
+    attribute_values = Table("bdb_attribute_values", meta,
+      Column("node_id", Integer,
+        ForeignKey(nodes.c.id, ondelete="CASCADE"),
+        primary_key=True),
+      Column("value_long", BigInteger, key="value_long"),
+      Column("value_string", Text, key="value_string"),
+      Column("value_double", Float),
+      Column("value_boolean", Boolean, key="value_boolean"),
+      Column("value_date", Date),
+      Column("value_time", Time),
+    )
+    
+    attribute_values.create();
+    
+    with contextlib.closing(engine.connect()) as conn:
+        conn.execute("insert into bdb_attribute_values (node_id,value_long,value_string,value_double,value_boolean,value_date,value_time) (select id,value_long,value_string,value_double,value_boolean,value_date,value_time from bdb_nodes)")
+    
+    nodes.c.value_long.drop()
+    nodes.c.value_string.drop()
+    nodes.c.value_double.drop()
+    nodes.c.value_boolean.drop()
+    nodes.c.value_date.drop()
+    nodes.c.value_time.drop()
index 387c011..69c42e5 100644 (file)
@@ -159,40 +159,34 @@ class ExprToSql(object):
         name = name.replace("/", "_")
         nelems = len(elems)
         groupname = "/".join(elems) #string.join(elems, "/")
-        
+
         value_alias_t = self._value_tables.setdefault(
             name + "_values",
-            schema.attribute_values.alias(name + "_values")
+            schema.nodes.alias(name + "_values")
         )
-        
+
         if not self.from_clause_contains(value_alias_t):
-            l0_node_alias = schema.nodes.alias(name + "_l0")
-            
             onclause_l0 = sql.and_(
-                l0_node_alias.c.file_id==schema.files.c.id,
-                l0_node_alias.c.name==attrname
+                value_alias_t.c.file_id==schema.files.c.id,
+                value_alias_t.c.name==attrname
             )
 
             if nelems > 0 and elems[0] == "":  #Formulated as /x/y/z, use absolute path
                 onclause_l0 = sql.and_(
                     onclause_l0,
-                    l0_node_alias.c.path == groupname
+                    value_alias_t.c.path == groupname
                 )
             elif nelems > 0 and elems[0] != "": # Formulated as x/y/z, use like filter (but not x/y)
                 onclause_l0 = sql.and_(
                     onclause_l0,
-                    l0_node_alias.c.path.like("%/"+groupname)
+                    value_alias_t.c.path.like("%/"+groupname)
                 )
                 
             self.from_clause = self.from_clause.outerjoin(
-                l0_node_alias,
+                value_alias_t,
                 onclause=onclause_l0
             )
 
-            self.from_clause = self.from_clause.outerjoin(
-                value_alias_t,
-                onclause=value_alias_t.c.node_id==l0_node_alias.c.id
-            )
         return getattr(value_alias_t.c, "value_" + type_)
     
     def from_clause_contains(self, table):
index 8cb4a68..b55e5e5 100644 (file)
@@ -81,19 +81,13 @@ nodes = Table("bdb_nodes", meta,
     Column("path", Text, nullable=False),
     Column("name", Text, nullable=False),
     Column("type", Integer, nullable=False),
-    UniqueConstraint("parent_id", "name"),
-)
-
-attribute_values = Table("bdb_attribute_values", meta,
-    Column("node_id", Integer,
-        ForeignKey(nodes.c.id, ondelete="CASCADE"),
-        primary_key=True),
     Column("value_long", BigInteger),
     Column("value_string", Text),
     Column("value_double", Float),
     Column("value_boolean", Boolean),
     Column("value_date", Date),
     Column("value_time", Time),
+    UniqueConstraint("parent_id", "name"),
 )
 
 filters = Table("bdb_filters", meta,