Wiki Spaces

Documentation
Projects
Resources

Get Help from Others

Q&A: Ask OpenMRS
Discussion: OpenMRS Talk
Real-Time: IRC Chat | Slack

Documentation

Page tree
Skip to end of metadata
Go to start of metadata

The following SQL queries (based on rules defined within this Talk discussion) may be used to check for concept validation rule violations within an OpenMRS database. Any results (other than an empty set) from any of the queries represents validation issues.

-- ==========================================================
-- List concepts with multiple preferred names in same locale
-- ==========================================================
SELECT cn1.concept_id, locale, name, locale_preferred
FROM concept_name cn1
WHERE locale_preferred
AND NOT voided
AND EXISTS
	(SELECT COUNT(*) as n, cn2.concept_id, locale, name, locale_preferred
  FROM concept_name cn2
  WHERE cn2.concept_id = cn1.concept_id
  AND cn2.locale = cn1.locale
  AND locale_preferred
  AND NOT voided
  GROUP BY cn2.concept_id, locale
  HAVING n > 1)
ORDER BY cn1.concept_id, locale, name;

-- ========================================================================
-- List concepts with duplicate name in same locale (excluding short names)
-- ========================================================================
SELECT cn.concept_id, cn.name, cn.locale
FROM concept_name cn
INNER JOIN (
    SELECT concept_id, name, locale
    FROM concept_name
    WHERE concept_name_type <> 'SHORT' AND NOT voided
    GROUP BY concept_id, name, locale
    HAVING COUNT(*) > 1
) AS subquery
ON cn.concept_id = subquery.concept_id
  AND cn.name = subquery.name
  AND cn.locale = subquery.locale
WHERE cn.concept_name_type <> 'SHORT' AND NOT cn.voided;

-- ==============================================================
-- List concepts with more than one short name in the same locale
-- ==============================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'SHORT'
AND EXISTS (
  SELECT COUNT(*) as n, concept_id, locale
  FROM concept_name cn2
  WHERE cn2.concept_name_type = cn1.concept_name_type
  AND cn2.concept_id = cn1.concept_id
  AND cn2.locale = cn1.locale
  AND NOT cn2.voided
  GROUP BY cn2.concept_id, cn2.locale
  HAVING n > 1)
ORDER BY concept_id, locale, name;

-- ==========================================================
-- List any concept with short name marked as local preferred
-- ==========================================================
SELECT concept_id, name, locale, locale_preferred
FROM concept_name
WHERE concept_name_type = 'SHORT'
AND locale_preferred
AND NOT voided
ORDER BY concept_id, locale;

-- ======================================================================
-- List any concept with more than one fully specified name in any locale
-- ======================================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'FULLY_SPECIFIED'
AND EXISTS (
  SELECT COUNT(*) as n, concept_id, locale
  FROM concept_name cn2
  WHERE cn2.concept_name_type = cn1.concept_name_type
  AND cn2.concept_id = cn1.concept_id
  AND cn2.locale = cn1.locale
  AND NOT cn2.voided
  GROUP BY cn2.concept_id, cn2.locale
  HAVING n > 1)
ORDER BY concept_id, locale, name;

-- =======================================================================
-- List any concepts that do not have a fully specified name in any locale
-- =======================================================================
SELECT DISTINCT concept_id
FROM concept_name cn1
WHERE NOT EXISTS (
  SELECT null FROM concept_name cn2
  WHERE cn2.concept_id = cn1.concept_id
  AND concept_name_type = 'FULLY_SPECIFIED'
  AND NOT cn2.voided)
GROUP BY concept_id
ORDER BY concept_id;

-- =============================
-- List invalid values name type
-- =============================
SELECT concept_id, name, concept_name_type
FROM concept_name
WHERE concept_name_type NOT IN ('FULLY_SPECIFIED', 'INDEX_TERM', 'SHORT')
AND NOT voided
ORDER BY concept_name_type;

-- =========================================
-- List invalid/unexpected values for locale
-- =========================================
SELECT concept_id, name, locale
FROM concept_name
WHERE locale NOT IN (
  'am', 'ar', 'bn', 'en', 'es', 'fr', 'ht', 'id', 'it', 'km',
  'nl', 'om', 'pt', 'ru', 'rw', 'sw', 'ti', 'ur', 'vi')
AND NOT voided
ORDER BY locale;

-- ===============================================================================
-- List any duplicated fully specified names (ignoring short names and index term)
-- ===============================================================================
-- Create indexed table of unvoided names of unretired concepts
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_lower;
CREATE TABLE concept_name_lower (
  concept_id int(11) DEFAULT NULL,
  locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  locale_preferred tinyint(1) DEFAULT 0,
  name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  concept_name_type varchar(50) CHARACTER SET utf8mb4 DEFAULT NULL,
  lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
  INDEX (concept_id),
  INDEX (locale),
  INDEX (lname),
  INDEX (name),
  INDEX (concept_name_type)
)
SELECT cn.concept_id, locale, locale_preferred, name, 
  concept_name_type, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE NOT voided
AND NOT retired;
 
-- List any duplicated fully specified names (ignoring short names and index terms)
SELECT cn1.concept_id, cn1.locale, cn1.locale_preferred, 
  cn1.name, cn1.concept_name_type
FROM concept_name_lower cn1
JOIN (
  SELECT cn2.lname, cn2.locale
  FROM concept_name_lower cn2
  WHERE (
    cn2.concept_name_type IS NULL
    AND cn2.locale_preferred
  )
  OR cn2.concept_name_type = 'FULLY_SPECIFIED'
  GROUP BY cn2.lname, cn2.locale
  HAVING COUNT(*) > 1
) dup ON dup.lname = cn1.lname AND dup.locale = cn1.locale
WHERE cn1.concept_name_type = 'FULLY_SPECIFIED'
OR EXISTS (
  SELECT NULL
  FROM concept_name_lower cn3
  WHERE cn3.lname = cn1.lname
  AND cn3.locale = cn1.locale
  AND cn3.concept_name_type = 'FULLY_SPECIFIED'
)
ORDER BY cn1.name, cn1.locale;
 
-- Clean up
DROP TABLE concept_name_lower;

-- ==========================================================
-- List any duplicated preferred names within the same locale
-- ==========================================================
-- Create indexed table of unvoided preferred names of unretired concepts 
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_preferred_lower;
CREATE TABLE concept_name_preferred_lower (
  concept_id int(11) DEFAULT NULL,
  locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
  INDEX (concept_id),
  INDEX (locale),
  INDEX (lname),
  INDEX (name)
)
SELECT cn.concept_id, locale, name, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE locale_preferred
AND NOT voided
AND NOT retired;

-- List any duplicated preferred names within the same locale
SELECT cn1.concept_id, cn1.locale, cn1.name
FROM concept_name_preferred_lower cn1
JOIN (
  SELECT cn2.lname, cn2.locale
  FROM concept_name_preferred_lower cn2
  GROUP BY cn2.lname, cn2.locale
  HAVING COUNT(*) > 1
) AS duplicates ON cn1.lname = duplicates.lname AND cn1.locale = duplicates.locale
ORDER BY cn1.name, cn1.locale, cn1.concept_id;

-- Clean up
DROP TABLE concept_name_preferred_lower;

-- ===============================================================
-- List any concepts with UUID that is not 36 characters in length
-- ===============================================================
SELECT concept_id, uuid
FROM concept
WHERE length(uuid) != 36
AND NOT retired
ORDER BY concept_id;

-- =============================================================================
-- List any duplicate mappings (same concept mapped to same code more than once)
-- =============================================================================
SELECT
  m1.concept_map_id,
  m1.concept_id,
  (SELECT name FROM concept_name n WHERE n.concept_id=m1.concept_id AND locale='en' AND locale_preferred) as name,
  (SELECT name FROM concept_map_type t WHERE t.concept_map_type_id = m1.concept_map_type_id) as type,
  (SELECT name FROM concept_reference_source WHERE concept_source_id = r1.concept_source_id) as source,
  r1.code
FROM
  concept_reference_map m1
  INNER JOIN concept_reference_map m2 ON m1.concept_id = m2.concept_id
  LEFT OUTER JOIN concept_reference_term r1 ON r1.concept_reference_term_id = m1.concept_reference_term_id
  LEFT OUTER JOIN concept_reference_term r2 ON r2.concept_reference_term_id = m2.concept_reference_term_id
WHERE
  m1.concept_map_id != m2.concept_map_id AND
  r1.concept_source_id = r2.concept_source_id AND
  r1.code = r2.code
ORDER BY m1.concept_id, r1.code, type;
  • No labels

1 Comment

  1. Will eventually want to add in a validation check to find any synonyms within an FSN in their locale that match a preferred name or FSN from another concepts, since these synonyms (in the absence of an FSN in their locale) may be presumed to be preferred and/or an FSN by OpenMRS, causing naming conflicts. Since there are (as I'm writing this comment) nearly 900 cases in CIEL, we won't add this to the validation queries quite yet. Preserving the query in this comment for future reference:

    -- ===============================================================================
    -- List any synonyms that might be perceived as duplicates to preferred names
    -- ===============================================================================
    -- Create indexed table of unvoided names of unretired concepts
    -- with lowercase name for faster case-insensitive name comparisons
    -- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
    DROP TABLE IF EXISTS concept_name_lower;
    CREATE TABLE concept_name_lower (
      concept_id int(11) DEFAULT NULL,
      locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
      locale_preferred tinyint(1) DEFAULT 0,
      name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
      concept_name_type varchar(50) CHARACTER SET utf8mb4 DEFAULT NULL,
      lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
      INDEX (concept_id),
      INDEX (locale),
      INDEX (lname),
      INDEX (name),
      INDEX (concept_name_type)
    )
    SELECT cn.concept_id, locale, locale_preferred, name,
      concept_name_type, LOWER(name) AS lname
    FROM concept_name cn
    JOIN concept c ON c.concept_id = cn.concept_id
    WHERE NOT voided
    AND NOT retired;
    
    -- List any synonyms without a corresponding FSN that match another
    -- concept's preferred name or FSN
    SELECT
      a.concept_id,
      fsn.name AS fully_specified_name,
      a.name AS synonym,
      a.locale,
      c_match.concept_id AS matching_concept_id,
      (
        SELECT name FROM concept_name_lower
        WHERE concept_id=c_match.concept_id AND locale='en'
        AND concept_name_type='FULLY_SPECIFIED'
      ) as matching_concept_name
    FROM
      concept_name_lower a
    LEFT JOIN
      concept_name_lower fsn ON
      a.concept_id = fsn.concept_id
      AND fsn.locale = 'en'
      AND fsn.concept_name_type = 'FULLY_SPECIFIED'
    LEFT JOIN
      concept_name_lower b ON
      a.concept_id = b.concept_id
      AND b.locale = a.locale
      AND b.concept_name_type = 'FULLY_SPECIFIED'
    LEFT JOIN
      (
        SELECT c1.concept_id, c1.lname
        FROM concept_name_lower c1
        WHERE (
          c1.locale_preferred = 1
          OR c1.concept_name_type = 'FULLY_SPECIFIED'
        )
      ) AS c_match ON a.lname = c_match.lname
      AND a.concept_id != c_match.concept_id
    WHERE a.concept_name_type IS NULL -- Synonyms
    AND b.concept_id IS NULL -- No FSN for the same concept in the same locale
    AND c_match.concept_id IS NOT NULL;
    
    -- Clean up
    DROP concept_name_lower;